summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFlorian Mayer <fmayer@google.com>2025-10-13 13:34:54 -0700
committerFlorian Mayer <fmayer@google.com>2025-10-13 13:34:54 -0700
commit4174aa6e06d94eae664e16cabb74c51596481eee (patch)
treeeaa1f13616d93e6ae5f067640da99ec4d62ae263
parent18bd65cd2f69f58b31e3789d1bffe2cf0a8c6d0c (diff)
parentc5bd68cbd522e6ac477d2b99a67d8b6054cb8bee (diff)
[𝘀𝗽𝗿] changes introduced through rebaseusers/fmayer/spr/main.flowsensitive-statusor-1n-add-mock-headers
Created using spr 1.3.7 [skip ci]
-rw-r--r--clang/include/clang/AST/OpenACCClause.h61
-rw-r--r--clang/include/clang/Sema/SemaHLSL.h1
-rw-r--r--clang/include/clang/Sema/SemaOpenACC.h16
-rw-r--r--clang/lib/AST/OpenACCClause.cpp14
-rw-r--r--clang/lib/CIR/CodeGen/Address.h8
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenCXXABI.h9
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenClass.cpp33
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp43
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenFunction.cpp8
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp28
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenTypes.cpp6
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenVTables.cpp10
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntime.cpp5
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp11
-rw-r--r--clang/lib/Sema/SemaHLSL.cpp34
-rw-r--r--clang/lib/Sema/SemaOpenACC.cpp8
-rw-r--r--clang/lib/Sema/SemaOpenACCClause.cpp4
-rw-r--r--clang/lib/Sema/TreeTransform.h4
-rw-r--r--clang/lib/Serialization/ASTReader.cpp2
-rw-r--r--clang/test/CIR/CodeGen/virtual-destructor-calls.cpp129
-rw-r--r--clang/test/OpenMP/target_indirect_codegen.cpp12
-rw-r--r--clang/test/OpenMP/target_parallel_num_threads_strict_messages.cpp (renamed from clang/test/OpenMP/amdgcn_parallel_num_threads_strict_messages.cpp)7
-rw-r--r--flang/include/flang/Optimizer/Builder/IntrinsicCall.h1
-rw-r--r--flang/lib/Optimizer/Builder/IntrinsicCall.cpp15
-rw-r--r--flang/lib/Optimizer/Transforms/CUFOpConversion.cpp4
-rw-r--r--flang/module/cudadevice.f909
-rw-r--r--flang/test/Fir/CUDA/cuda-data-transfer.fir40
-rw-r--r--flang/test/Lower/CUDA/cuda-device-proc.cuf12
-rw-r--r--libc/src/string/memory_utils/op_aarch64.h18
-rw-r--r--lld/COFF/DLL.cpp30
-rw-r--r--lld/test/COFF/arm64-delayimport.yaml46
-rw-r--r--lld/test/COFF/arm64x-delayimport.test92
-rw-r--r--lldb/bindings/python/python-typemaps.swig5
-rw-r--r--lldb/source/Target/RegisterContextUnwind.cpp32
-rw-r--r--lldb/test/API/functionalities/unwind/cortex-m-exception/TestCortexMExceptionUnwind.py2
-rw-r--r--lldb/test/API/python_api/default-constructor/sb_filespec.py2
-rw-r--r--llvm/include/llvm/ADT/StringExtras.h6
-rw-r--r--llvm/include/llvm/Analysis/ScalarEvolution.h4
-rw-r--r--llvm/include/llvm/Analysis/ScalarEvolutionPatternMatch.h74
-rw-r--r--llvm/include/llvm/Analysis/StaticDataProfileInfo.h18
-rw-r--r--llvm/include/llvm/Frontend/OpenMP/OMPKinds.def2
-rw-r--r--llvm/lib/Analysis/ScalarEvolution.cpp102
-rw-r--r--llvm/lib/Analysis/StaticDataProfileInfo.cpp40
-rw-r--r--llvm/lib/CodeGen/MIRPrinter.cpp30
-rw-r--r--llvm/lib/CodeGen/StaticDataAnnotator.cpp15
-rw-r--r--llvm/lib/CodeGen/StaticDataSplitter.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp12
-rw-r--r--llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp50
-rw-r--r--llvm/lib/Transforms/Instrumentation/MemProfUse.cpp55
-rw-r--r--llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp2
-rw-r--r--llvm/test/CodeGen/AMDGPU/sched.group.classification.mir59
-rw-r--r--llvm/test/CodeGen/MIR/AArch64/return-address-signing.mir2
-rw-r--r--llvm/test/CodeGen/X86/global-variable-partition-with-dap.ll13
-rw-r--r--llvm/test/CodeGen/X86/global-variable-partition.ll18
-rw-r--r--llvm/test/Transforms/PGOProfile/data-access-profile.ll83
-rw-r--r--llvm/unittests/ADT/StringExtrasTest.cpp6
-rw-r--r--llvm/unittests/Analysis/ScalarEvolutionTest.cpp12
-rw-r--r--mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt4
-rw-r--r--mlir/include/mlir/Dialect/LLVMIR/LLVMDialectBytecode.td353
-rw-r--r--mlir/lib/Dialect/LLVMIR/CMakeLists.txt2
-rw-r--r--mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp3
-rw-r--r--mlir/lib/Dialect/LLVMIR/IR/LLVMDialectBytecode.cpp154
-rw-r--r--mlir/lib/Dialect/LLVMIR/IR/LLVMDialectBytecode.h27
-rw-r--r--mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp17
-rw-r--r--mlir/test/Dialect/LLVMIR/bytecode.mlir35
-rw-r--r--mlir/test/Dialect/LLVMIR/debuginfo.mlir1
-rw-r--r--mlir/test/Dialect/LLVMIR/roundtrip.mlir6
-rw-r--r--mlir/test/Dialect/XeGPU/xegpu-blocking.mlir98
68 files changed, 1636 insertions, 434 deletions
diff --git a/clang/include/clang/AST/OpenACCClause.h b/clang/include/clang/AST/OpenACCClause.h
index 613c03ce4ad6..379495c65882 100644
--- a/clang/include/clang/AST/OpenACCClause.h
+++ b/clang/include/clang/AST/OpenACCClause.h
@@ -1277,7 +1277,7 @@ public:
};
// A structure to stand in for the recipe on a reduction. RecipeDecl is the
-// 'main' declaration used for initializaiton, which is fixed.
+// 'main' declaration used for initializaiton, which is fixed.
struct OpenACCReductionRecipe {
VarDecl *AllocaDecl;
@@ -1297,45 +1297,72 @@ struct OpenACCReductionRecipe {
// -For a struct without the operator, this will be 1 element per field, which
// should be the combiner for that element.
// -For an array of any of the above, it will be the above for the element.
- llvm::SmallVector<CombinerRecipe, 1> CombinerRecipes;
+ // Note: These are necessarily stored in either Trailing Storage (when in the
+ // AST), or in a separate collection when being semantically analyzed.
+ llvm::ArrayRef<CombinerRecipe> CombinerRecipes;
OpenACCReductionRecipe(VarDecl *A, llvm::ArrayRef<CombinerRecipe> Combiners)
: AllocaDecl(A), CombinerRecipes(Combiners) {}
bool isSet() const { return AllocaDecl; }
- static OpenACCReductionRecipe Empty() {
- return OpenACCReductionRecipe(/*AllocaDecl=*/nullptr, {});
+};
+
+// A version of the above that is used for semantic analysis, at a time before
+// the OpenACCReductionClause node has been created. This one has storage for
+// the CombinerRecipe, since Trailing storage for it doesn't exist yet.
+struct OpenACCReductionRecipeWithStorage : OpenACCReductionRecipe {
+ llvm::SmallVector<CombinerRecipe, 1> CombinerRecipeStorage;
+
+ OpenACCReductionRecipeWithStorage(VarDecl *A,
+ llvm::ArrayRef<CombinerRecipe> Combiners)
+ : OpenACCReductionRecipe(A, {}), CombinerRecipeStorage(Combiners) {
+ CombinerRecipes = CombinerRecipeStorage;
+ }
+ static OpenACCReductionRecipeWithStorage Empty() {
+ return OpenACCReductionRecipeWithStorage(/*AllocaDecl=*/nullptr, {});
}
};
class OpenACCReductionClause final
: public OpenACCClauseWithVarList,
private llvm::TrailingObjects<OpenACCReductionClause, Expr *,
- OpenACCReductionRecipe> {
+ OpenACCReductionRecipe,
+ OpenACCReductionRecipe::CombinerRecipe> {
friend TrailingObjects;
OpenACCReductionOperator Op;
OpenACCReductionClause(SourceLocation BeginLoc, SourceLocation LParenLoc,
OpenACCReductionOperator Operator,
ArrayRef<Expr *> VarList,
- ArrayRef<OpenACCReductionRecipe> Recipes,
+ ArrayRef<OpenACCReductionRecipeWithStorage> Recipes,
SourceLocation EndLoc)
: OpenACCClauseWithVarList(OpenACCClauseKind::Reduction, BeginLoc,
LParenLoc, EndLoc),
Op(Operator) {
- assert(VarList.size() == Recipes.size());
+ assert(VarList.size() == Recipes.size());
setExprs(getTrailingObjects<Expr *>(VarList.size()), VarList);
- llvm::uninitialized_copy(Recipes,
- getTrailingObjects<OpenACCReductionRecipe>());
- }
-public:
- ~OpenACCReductionClause() {
- for (unsigned I = 0; I < getExprs().size(); ++I) {
- getTrailingObjects<OpenACCReductionRecipe>()[I].~OpenACCReductionRecipe();
+ // Since we're using trailing storage on this node to store the 'combiner'
+ // recipes of the Reduction Recipes (which have a 1:M relationship), we need
+ // to ensure we get the ArrayRef of each of our combiner 'correct'.
+ OpenACCReductionRecipe::CombinerRecipe *CurCombinerLoc =
+ getTrailingObjects<OpenACCReductionRecipe::CombinerRecipe>();
+ for (const auto &[Idx, R] : llvm::enumerate(Recipes)) {
+
+ // ArrayRef to the 'correct' data location in trailing storage.
+ llvm::MutableArrayRef<OpenACCReductionRecipe::CombinerRecipe>
+ NewCombiners{CurCombinerLoc, R.CombinerRecipes.size()};
+ CurCombinerLoc += R.CombinerRecipes.size();
+
+ llvm::uninitialized_copy(R.CombinerRecipes, NewCombiners.begin());
+
+ // Placement new into the correct location in trailng storage.
+ new (&getTrailingObjects<OpenACCReductionRecipe>()[Idx])
+ OpenACCReductionRecipe(R.AllocaDecl, NewCombiners);
}
}
+public:
static bool classof(const OpenACCClause *C) {
return C->getClauseKind() == OpenACCClauseKind::Reduction;
}
@@ -1353,13 +1380,17 @@ public:
static OpenACCReductionClause *
Create(const ASTContext &C, SourceLocation BeginLoc, SourceLocation LParenLoc,
OpenACCReductionOperator Operator, ArrayRef<Expr *> VarList,
- ArrayRef<OpenACCReductionRecipe> Recipes, SourceLocation EndLoc);
+ ArrayRef<OpenACCReductionRecipeWithStorage> Recipes,
+ SourceLocation EndLoc);
OpenACCReductionOperator getReductionOp() const { return Op; }
size_t numTrailingObjects(OverloadToken<Expr *>) const {
return getExprs().size();
}
+ size_t numTrailingObjects(OverloadToken<OpenACCReductionRecipe>) const {
+ return getExprs().size();
+ }
};
class OpenACCLinkClause final
diff --git a/clang/include/clang/Sema/SemaHLSL.h b/clang/include/clang/Sema/SemaHLSL.h
index 46b088c0174b..f9d3a4ea9480 100644
--- a/clang/include/clang/Sema/SemaHLSL.h
+++ b/clang/include/clang/Sema/SemaHLSL.h
@@ -215,7 +215,6 @@ public:
bool diagnosePositionType(QualType T, const ParsedAttr &AL);
bool CanPerformScalarCast(QualType SrcTy, QualType DestTy);
- bool ContainsBitField(QualType BaseTy);
bool CanPerformElementwiseCast(Expr *Src, QualType DestType);
bool CanPerformAggregateSplatCast(Expr *Src, QualType DestType);
ExprResult ActOnOutParamExpr(ParmVarDecl *Param, Expr *Arg);
diff --git a/clang/include/clang/Sema/SemaOpenACC.h b/clang/include/clang/Sema/SemaOpenACC.h
index 16e7f1bc8301..f751e985ae0f 100644
--- a/clang/include/clang/Sema/SemaOpenACC.h
+++ b/clang/include/clang/Sema/SemaOpenACC.h
@@ -245,7 +245,7 @@ public:
OpenACCPrivateRecipe CreatePrivateInitRecipe(const Expr *VarExpr);
OpenACCFirstPrivateRecipe CreateFirstPrivateInitRecipe(const Expr *VarExpr);
- OpenACCReductionRecipe
+ OpenACCReductionRecipeWithStorage
CreateReductionInitRecipe(OpenACCReductionOperator ReductionOperator,
const Expr *VarExpr);
@@ -951,12 +951,14 @@ public:
ArrayRef<Expr *> IntExprs, SourceLocation EndLoc);
// Does the checking for a 'reduction ' clause that needs to be done in
// dependent and not dependent cases.
- OpenACCClause *CheckReductionClause(
- ArrayRef<const OpenACCClause *> ExistingClauses,
- OpenACCDirectiveKind DirectiveKind, SourceLocation BeginLoc,
- SourceLocation LParenLoc, OpenACCReductionOperator ReductionOp,
- ArrayRef<Expr *> Vars, ArrayRef<OpenACCReductionRecipe> Recipes,
- SourceLocation EndLoc);
+ OpenACCClause *
+ CheckReductionClause(ArrayRef<const OpenACCClause *> ExistingClauses,
+ OpenACCDirectiveKind DirectiveKind,
+ SourceLocation BeginLoc, SourceLocation LParenLoc,
+ OpenACCReductionOperator ReductionOp,
+ ArrayRef<Expr *> Vars,
+ ArrayRef<OpenACCReductionRecipeWithStorage> Recipes,
+ SourceLocation EndLoc);
ExprResult BuildOpenACCAsteriskSizeExpr(SourceLocation AsteriskLoc);
ExprResult ActOnOpenACCAsteriskSizeExpr(SourceLocation AsteriskLoc);
diff --git a/clang/lib/AST/OpenACCClause.cpp b/clang/lib/AST/OpenACCClause.cpp
index 6c4bc7c274ea..17c6bece44c8 100644
--- a/clang/lib/AST/OpenACCClause.cpp
+++ b/clang/lib/AST/OpenACCClause.cpp
@@ -506,11 +506,17 @@ OpenACCDeviceTypeClause *OpenACCDeviceTypeClause::Create(
OpenACCReductionClause *OpenACCReductionClause::Create(
const ASTContext &C, SourceLocation BeginLoc, SourceLocation LParenLoc,
OpenACCReductionOperator Operator, ArrayRef<Expr *> VarList,
- ArrayRef<OpenACCReductionRecipe> Recipes,
+ ArrayRef<OpenACCReductionRecipeWithStorage> Recipes,
SourceLocation EndLoc) {
- void *Mem = C.Allocate(
- OpenACCReductionClause::totalSizeToAlloc<Expr *, OpenACCReductionRecipe>(
- VarList.size(), Recipes.size()));
+ size_t NumCombiners = llvm::accumulate(
+ Recipes, 0, [](size_t Num, const OpenACCReductionRecipe &R) {
+ return Num + R.CombinerRecipes.size();
+ });
+
+ void *Mem = C.Allocate(OpenACCReductionClause::totalSizeToAlloc<
+ Expr *, OpenACCReductionRecipe,
+ OpenACCReductionRecipe::CombinerRecipe>(
+ VarList.size(), Recipes.size(), NumCombiners));
return new (Mem) OpenACCReductionClause(BeginLoc, LParenLoc, Operator,
VarList, Recipes, EndLoc);
}
diff --git a/clang/lib/CIR/CodeGen/Address.h b/clang/lib/CIR/CodeGen/Address.h
index fb74aa0f3bb0..a67cbad7033a 100644
--- a/clang/lib/CIR/CodeGen/Address.h
+++ b/clang/lib/CIR/CodeGen/Address.h
@@ -17,6 +17,7 @@
#include "mlir/IR/Value.h"
#include "clang/AST/CharUnits.h"
#include "clang/CIR/Dialect/IR/CIRTypes.h"
+#include "clang/CIR/MissingFeatures.h"
#include "llvm/ADT/PointerIntPair.h"
namespace clang::CIRGen {
@@ -90,6 +91,13 @@ public:
return getPointer();
}
+ /// Return the pointer contained in this class after authenticating it and
+ /// adding offset to it if necessary.
+ mlir::Value emitRawPointer() const {
+ assert(!cir::MissingFeatures::addressPointerAuthInfo());
+ return getBasePointer();
+ }
+
mlir::Type getType() const {
assert(mlir::cast<cir::PointerType>(
pointerAndKnownNonNull.getPointer().getType())
diff --git a/clang/lib/CIR/CodeGen/CIRGenCXXABI.h b/clang/lib/CIR/CodeGen/CIRGenCXXABI.h
index 06f41cd8fcfd..6d3741c41735 100644
--- a/clang/lib/CIR/CodeGen/CIRGenCXXABI.h
+++ b/clang/lib/CIR/CodeGen/CIRGenCXXABI.h
@@ -191,6 +191,15 @@ public:
virtual void emitVTableDefinitions(CIRGenVTables &cgvt,
const CXXRecordDecl *rd) = 0;
+ using DeleteOrMemberCallExpr =
+ llvm::PointerUnion<const CXXDeleteExpr *, const CXXMemberCallExpr *>;
+
+ virtual mlir::Value emitVirtualDestructorCall(CIRGenFunction &cgf,
+ const CXXDestructorDecl *dtor,
+ CXXDtorType dtorType,
+ Address thisAddr,
+ DeleteOrMemberCallExpr e) = 0;
+
/// Emit any tables needed to implement virtual inheritance. For Itanium,
/// this emits virtual table tables.
virtual void emitVirtualInheritanceTables(const CXXRecordDecl *rd) = 0;
diff --git a/clang/lib/CIR/CodeGen/CIRGenClass.cpp b/clang/lib/CIR/CodeGen/CIRGenClass.cpp
index 485b2c86cbc5..dd357ce69f1b 100644
--- a/clang/lib/CIR/CodeGen/CIRGenClass.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenClass.cpp
@@ -895,6 +895,26 @@ void CIRGenFunction::destroyCXXObject(CIRGenFunction &cgf, Address addr,
}
namespace {
+mlir::Value loadThisForDtorDelete(CIRGenFunction &cgf,
+ const CXXDestructorDecl *dd) {
+ if (Expr *thisArg = dd->getOperatorDeleteThisArg())
+ return cgf.emitScalarExpr(thisArg);
+ return cgf.loadCXXThis();
+}
+
+/// Call the operator delete associated with the current destructor.
+struct CallDtorDelete final : EHScopeStack::Cleanup {
+ CallDtorDelete() {}
+
+ void emit(CIRGenFunction &cgf) override {
+ const CXXDestructorDecl *dtor = cast<CXXDestructorDecl>(cgf.curFuncDecl);
+ const CXXRecordDecl *classDecl = dtor->getParent();
+ cgf.emitDeleteCall(dtor->getOperatorDelete(),
+ loadThisForDtorDelete(cgf, dtor),
+ cgf.getContext().getCanonicalTagType(classDecl));
+ }
+};
+
class DestroyField final : public EHScopeStack::Cleanup {
const FieldDecl *field;
CIRGenFunction::Destroyer *destroyer;
@@ -932,7 +952,18 @@ void CIRGenFunction::enterDtorCleanups(const CXXDestructorDecl *dd,
// The deleting-destructor phase just needs to call the appropriate
// operator delete that Sema picked up.
if (dtorType == Dtor_Deleting) {
- cgm.errorNYI(dd->getSourceRange(), "deleting destructor cleanups");
+ assert(dd->getOperatorDelete() &&
+ "operator delete missing - EnterDtorCleanups");
+ if (cxxStructorImplicitParamValue) {
+ cgm.errorNYI(dd->getSourceRange(), "deleting destructor with vtt");
+ } else {
+ if (dd->getOperatorDelete()->isDestroyingOperatorDelete()) {
+ cgm.errorNYI(dd->getSourceRange(),
+ "deleting destructor with destroying operator delete");
+ } else {
+ ehStack.pushCleanup<CallDtorDelete>(NormalAndEHCleanup);
+ }
+ }
return;
}
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp
index 97c0944fca33..b1e9e768ff1e 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp
@@ -130,13 +130,11 @@ RValue CIRGenFunction::emitCXXMemberOrOperatorMemberCallExpr(
const CXXMethodDecl *calleeDecl =
devirtualizedMethod ? devirtualizedMethod : md;
const CIRGenFunctionInfo *fInfo = nullptr;
- if (isa<CXXDestructorDecl>(calleeDecl)) {
- cgm.errorNYI(ce->getSourceRange(),
- "emitCXXMemberOrOperatorMemberCallExpr: destructor call");
- return RValue::get(nullptr);
- }
-
- fInfo = &cgm.getTypes().arrangeCXXMethodDeclaration(calleeDecl);
+ if (const auto *dtor = dyn_cast<CXXDestructorDecl>(calleeDecl))
+ fInfo = &cgm.getTypes().arrangeCXXStructorDeclaration(
+ GlobalDecl(dtor, Dtor_Complete));
+ else
+ fInfo = &cgm.getTypes().arrangeCXXMethodDeclaration(calleeDecl);
cir::FuncType ty = cgm.getTypes().getFunctionType(*fInfo);
@@ -151,9 +149,34 @@ RValue CIRGenFunction::emitCXXMemberOrOperatorMemberCallExpr(
// because then we know what the type is.
bool useVirtualCall = canUseVirtualCall && !devirtualizedMethod;
- if (isa<CXXDestructorDecl>(calleeDecl)) {
- cgm.errorNYI(ce->getSourceRange(),
- "emitCXXMemberOrOperatorMemberCallExpr: destructor call");
+ if (const auto *dtor = dyn_cast<CXXDestructorDecl>(calleeDecl)) {
+ assert(ce->arg_begin() == ce->arg_end() &&
+ "Destructor shouldn't have explicit parameters");
+ assert(returnValue.isNull() && "Destructor shouldn't have return value");
+ if (useVirtualCall) {
+ cgm.getCXXABI().emitVirtualDestructorCall(*this, dtor, Dtor_Complete,
+ thisPtr.getAddress(),
+ cast<CXXMemberCallExpr>(ce));
+ } else {
+ GlobalDecl globalDecl(dtor, Dtor_Complete);
+ CIRGenCallee callee;
+ assert(!cir::MissingFeatures::appleKext());
+ if (!devirtualizedMethod) {
+ callee = CIRGenCallee::forDirect(
+ cgm.getAddrOfCXXStructor(globalDecl, fInfo, ty), globalDecl);
+ } else {
+ cgm.errorNYI(ce->getSourceRange(), "devirtualized destructor call");
+ return RValue::get(nullptr);
+ }
+
+ QualType thisTy =
+ isArrow ? base->getType()->getPointeeType() : base->getType();
+ // CIRGen does not pass CallOrInvoke here (different from OG LLVM codegen)
+ // because in practice it always null even in OG.
+ emitCXXDestructorCall(globalDecl, callee, thisPtr.getPointer(), thisTy,
+ /*implicitParam=*/nullptr,
+ /*implicitParamTy=*/QualType(), ce);
+ }
return RValue::get(nullptr);
}
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
index 7a774e0441bb..01a43a997637 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
@@ -678,7 +678,13 @@ void CIRGenFunction::emitDestructorBody(FunctionArgList &args) {
// possible to delegate the destructor body to the complete
// destructor. Do so.
if (dtorType == Dtor_Deleting) {
- cgm.errorNYI(dtor->getSourceRange(), "deleting destructor");
+ RunCleanupsScope dtorEpilogue(*this);
+ enterDtorCleanups(dtor, Dtor_Deleting);
+ if (haveInsertPoint()) {
+ QualType thisTy = dtor->getFunctionObjectParameterType();
+ emitCXXDestructorCall(dtor, Dtor_Complete, /*forVirtualBase=*/false,
+ /*delegating=*/false, loadCXXThisAddress(), thisTy);
+ }
return;
}
diff --git a/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp b/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp
index 9e490c6d054a..d30c975a8ffb 100644
--- a/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp
@@ -95,7 +95,10 @@ public:
clang::GlobalDecl gd, Address thisAddr,
mlir::Type ty,
SourceLocation loc) override;
-
+ mlir::Value emitVirtualDestructorCall(CIRGenFunction &cgf,
+ const CXXDestructorDecl *dtor,
+ CXXDtorType dtorType, Address thisAddr,
+ DeleteOrMemberCallExpr e) override;
mlir::Value getVTableAddressPoint(BaseSubobject base,
const CXXRecordDecl *vtableClass) override;
mlir::Value getVTableAddressPointInStructorWithVTT(
@@ -465,6 +468,29 @@ void CIRGenItaniumCXXABI::emitVTableDefinitions(CIRGenVTables &cgvt,
}
}
+mlir::Value CIRGenItaniumCXXABI::emitVirtualDestructorCall(
+ CIRGenFunction &cgf, const CXXDestructorDecl *dtor, CXXDtorType dtorType,
+ Address thisAddr, DeleteOrMemberCallExpr expr) {
+ auto *callExpr = dyn_cast<const CXXMemberCallExpr *>(expr);
+ auto *delExpr = dyn_cast<const CXXDeleteExpr *>(expr);
+ assert((callExpr != nullptr) ^ (delExpr != nullptr));
+ assert(callExpr == nullptr || callExpr->arg_begin() == callExpr->arg_end());
+ assert(dtorType == Dtor_Deleting || dtorType == Dtor_Complete);
+
+ GlobalDecl globalDecl(dtor, dtorType);
+ const CIRGenFunctionInfo *fnInfo =
+ &cgm.getTypes().arrangeCXXStructorDeclaration(globalDecl);
+ const cir::FuncType &fnTy = cgm.getTypes().getFunctionType(*fnInfo);
+ auto callee = CIRGenCallee::forVirtual(callExpr, globalDecl, thisAddr, fnTy);
+
+ QualType thisTy =
+ callExpr ? callExpr->getObjectType() : delExpr->getDestroyedType();
+
+ cgf.emitCXXDestructorCall(globalDecl, callee, thisAddr.emitRawPointer(),
+ thisTy, nullptr, QualType(), nullptr);
+ return nullptr;
+}
+
void CIRGenItaniumCXXABI::emitVirtualInheritanceTables(
const CXXRecordDecl *rd) {
CIRGenVTables &vtables = cgm.getVTables();
diff --git a/clang/lib/CIR/CodeGen/CIRGenTypes.cpp b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp
index e65896a9ff10..2ab1ea0c8ff8 100644
--- a/clang/lib/CIR/CodeGen/CIRGenTypes.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp
@@ -619,10 +619,8 @@ const CIRGenFunctionInfo &CIRGenTypes::arrangeGlobalDeclaration(GlobalDecl gd) {
const auto *fd = cast<FunctionDecl>(gd.getDecl());
if (isa<CXXConstructorDecl>(gd.getDecl()) ||
- isa<CXXDestructorDecl>(gd.getDecl())) {
- cgm.errorNYI(SourceLocation(),
- "arrangeGlobalDeclaration for C++ constructor or destructor");
- }
+ isa<CXXDestructorDecl>(gd.getDecl()))
+ return arrangeCXXStructorDeclaration(gd);
return arrangeFunctionDeclaration(fd);
}
diff --git a/clang/lib/CIR/CodeGen/CIRGenVTables.cpp b/clang/lib/CIR/CodeGen/CIRGenVTables.cpp
index 84f59773757b..36bab625c4dd 100644
--- a/clang/lib/CIR/CodeGen/CIRGenVTables.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenVTables.cpp
@@ -120,12 +120,6 @@ mlir::Attribute CIRGenVTables::getVTableComponent(
assert(!cir::MissingFeatures::vtableRelativeLayout());
switch (component.getKind()) {
- case VTableComponent::CK_CompleteDtorPointer:
- cgm.errorNYI("getVTableComponent: CompleteDtorPointer");
- return mlir::Attribute();
- case VTableComponent::CK_DeletingDtorPointer:
- cgm.errorNYI("getVTableComponent: DeletingDtorPointer");
- return mlir::Attribute();
case VTableComponent::CK_UnusedFunctionPointer:
cgm.errorNYI("getVTableComponent: UnusedFunctionPointer");
return mlir::Attribute();
@@ -148,7 +142,9 @@ mlir::Attribute CIRGenVTables::getVTableComponent(
"expected GlobalViewAttr or ConstPtrAttr");
return rtti;
- case VTableComponent::CK_FunctionPointer: {
+ case VTableComponent::CK_FunctionPointer:
+ case VTableComponent::CK_CompleteDtorPointer:
+ case VTableComponent::CK_DeletingDtorPointer: {
GlobalDecl gd = component.getGlobalDecl();
assert(!cir::MissingFeatures::cudaSupport());
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index fa946921d468..1ff2be756452 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -1762,8 +1762,11 @@ void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl *FD,
// access its value.
llvm::GlobalValue *Addr = GV;
if (CGM.getLangOpts().OpenMPIsTargetDevice) {
+ llvm::PointerType *FnPtrTy = llvm::PointerType::get(
+ CGM.getLLVMContext(),
+ CGM.getModule().getDataLayout().getProgramAddressSpace());
Addr = new llvm::GlobalVariable(
- CGM.getModule(), CGM.VoidPtrTy,
+ CGM.getModule(), FnPtrTy,
/*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name,
nullptr, llvm::GlobalValue::NotThreadLocal,
CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index 3613b6a143d4..fddeba98adcc 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
//
// This provides a generalized class for OpenMP runtime code generation
-// specialized by GPU targets NVPTX and AMDGCN.
+// specialized by GPU targets NVPTX, AMDGCN and SPIR-V.
//
//===----------------------------------------------------------------------===//
@@ -1242,12 +1242,13 @@ void CGOpenMPRuntimeGPU::emitParallelCall(
CGBuilderTy &Bld = CGF.Builder;
llvm::Value *NumThreadsVal = NumThreads;
llvm::Function *WFn = WrapperFunctionsMap[OutlinedFn];
- llvm::Value *ID = llvm::ConstantPointerNull::get(CGM.Int8PtrTy);
- if (WFn)
- ID = Bld.CreateBitOrPointerCast(WFn, CGM.Int8PtrTy);
- llvm::Type *FnPtrTy = llvm::PointerType::get(
+ llvm::PointerType *FnPtrTy = llvm::PointerType::get(
CGF.getLLVMContext(), CGM.getDataLayout().getProgramAddressSpace());
+ llvm::Value *ID = llvm::ConstantPointerNull::get(FnPtrTy);
+ if (WFn)
+ ID = Bld.CreateBitOrPointerCast(WFn, FnPtrTy);
+
llvm::Value *FnPtr = Bld.CreateBitOrPointerCast(OutlinedFn, FnPtrTy);
// Create a private scope that will globalize the arguments
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 17cb1e4f153c..72b2ac99ec53 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -3544,40 +3544,6 @@ bool SemaHLSL::CanPerformScalarCast(QualType SrcTy, QualType DestTy) {
llvm_unreachable("Unhandled scalar cast");
}
-// Detect if a type contains a bitfield. Will be removed when
-// bitfield support is added to HLSLElementwiseCast and HLSLAggregateSplatCast
-bool SemaHLSL::ContainsBitField(QualType BaseTy) {
- llvm::SmallVector<QualType, 16> WorkList;
- WorkList.push_back(BaseTy);
- while (!WorkList.empty()) {
- QualType T = WorkList.pop_back_val();
- T = T.getCanonicalType().getUnqualifiedType();
- // only check aggregate types
- if (const auto *AT = dyn_cast<ConstantArrayType>(T)) {
- WorkList.push_back(AT->getElementType());
- continue;
- }
- if (const auto *RT = dyn_cast<RecordType>(T)) {
- const RecordDecl *RD = RT->getOriginalDecl()->getDefinitionOrSelf();
- if (RD->isUnion())
- continue;
-
- const CXXRecordDecl *CXXD = dyn_cast<CXXRecordDecl>(RD);
-
- if (CXXD && CXXD->isStandardLayout())
- RD = CXXD->getStandardLayoutBaseWithFields();
-
- for (const auto *FD : RD->fields()) {
- if (FD->isBitField())
- return true;
- WorkList.push_back(FD->getType());
- }
- continue;
- }
- }
- return false;
-}
-
// Can perform an HLSL Aggregate splat cast if the Dest is an aggregate and the
// Src is a scalar or a vector of length 1
// Or if Dest is a vector and Src is a vector of length 1
diff --git a/clang/lib/Sema/SemaOpenACC.cpp b/clang/lib/Sema/SemaOpenACC.cpp
index 779b6e9cb6b4..ca99834ce826 100644
--- a/clang/lib/Sema/SemaOpenACC.cpp
+++ b/clang/lib/Sema/SemaOpenACC.cpp
@@ -2883,12 +2883,12 @@ SemaOpenACC::CreateFirstPrivateInitRecipe(const Expr *VarExpr) {
return OpenACCFirstPrivateRecipe(AllocaDecl, Temporary);
}
-OpenACCReductionRecipe SemaOpenACC::CreateReductionInitRecipe(
+OpenACCReductionRecipeWithStorage SemaOpenACC::CreateReductionInitRecipe(
OpenACCReductionOperator ReductionOperator, const Expr *VarExpr) {
// We don't strip bounds here, so that we are doing our recipe init at the
// 'lowest' possible level. Codegen is going to have to do its own 'looping'.
if (!VarExpr || VarExpr->getType()->isDependentType())
- return OpenACCReductionRecipe::Empty();
+ return OpenACCReductionRecipeWithStorage::Empty();
QualType VarTy =
VarExpr->getType().getNonReferenceType().getUnqualifiedType();
@@ -2905,7 +2905,7 @@ OpenACCReductionRecipe SemaOpenACC::CreateReductionInitRecipe(
// at any of the combiners.
if (CreateReductionCombinerRecipe(VarExpr->getBeginLoc(), ReductionOperator,
VarTy, CombinerRecipes))
- return OpenACCReductionRecipe::Empty();
+ return OpenACCReductionRecipeWithStorage::Empty();
VarDecl *AllocaDecl = CreateAllocaDecl(
getASTContext(), SemaRef.getCurContext(), VarExpr->getBeginLoc(),
@@ -2956,7 +2956,7 @@ OpenACCReductionRecipe SemaOpenACC::CreateReductionInitRecipe(
AllocaDecl->setInitStyle(VarDecl::CallInit);
}
- return OpenACCReductionRecipe(AllocaDecl, CombinerRecipes);
+ return OpenACCReductionRecipeWithStorage(AllocaDecl, CombinerRecipes);
}
bool SemaOpenACC::CreateReductionCombinerRecipe(
diff --git a/clang/lib/Sema/SemaOpenACCClause.cpp b/clang/lib/Sema/SemaOpenACCClause.cpp
index 881e960e5a24..ead97816defe 100644
--- a/clang/lib/Sema/SemaOpenACCClause.cpp
+++ b/clang/lib/Sema/SemaOpenACCClause.cpp
@@ -1772,7 +1772,7 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitReductionClause(
}
SmallVector<Expr *> ValidVars;
- SmallVector<OpenACCReductionRecipe> Recipes;
+ SmallVector<OpenACCReductionRecipeWithStorage> Recipes;
for (Expr *Var : Clause.getVarList()) {
ExprResult Res = SemaRef.CheckReductionVar(Clause.getDirectiveKind(),
@@ -2196,7 +2196,7 @@ OpenACCClause *SemaOpenACC::CheckReductionClause(
ArrayRef<const OpenACCClause *> ExistingClauses,
OpenACCDirectiveKind DirectiveKind, SourceLocation BeginLoc,
SourceLocation LParenLoc, OpenACCReductionOperator ReductionOp,
- ArrayRef<Expr *> Vars, ArrayRef<OpenACCReductionRecipe> Recipes,
+ ArrayRef<Expr *> Vars, ArrayRef<OpenACCReductionRecipeWithStorage> Recipes,
SourceLocation EndLoc) {
if (DirectiveKind == OpenACCDirectiveKind::Loop ||
isOpenACCCombinedDirectiveKind(DirectiveKind)) {
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 940324bbc5e4..0dcfe0ecbad4 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -12374,7 +12374,7 @@ void OpenACCClauseTransform<Derived>::VisitReductionClause(
const OpenACCReductionClause &C) {
SmallVector<Expr *> TransformedVars = VisitVarList(C.getVarList());
SmallVector<Expr *> ValidVars;
- llvm::SmallVector<OpenACCReductionRecipe> Recipes;
+ llvm::SmallVector<OpenACCReductionRecipeWithStorage> Recipes;
for (const auto [Var, OrigRecipe] :
llvm::zip(TransformedVars, C.getRecipes())) {
@@ -12384,7 +12384,7 @@ void OpenACCClauseTransform<Derived>::VisitReductionClause(
ValidVars.push_back(Res.get());
if (OrigRecipe.isSet())
- Recipes.push_back(OrigRecipe);
+ Recipes.emplace_back(OrigRecipe.AllocaDecl, OrigRecipe.CombinerRecipes);
else
Recipes.push_back(Self.getSema().OpenACC().CreateReductionInitRecipe(
C.getReductionOp(), Res.get()));
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 868f0cc8b1da..8ebf909fb4df 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -13006,7 +13006,7 @@ OpenACCClause *ASTRecordReader::readOpenACCClause() {
SourceLocation LParenLoc = readSourceLocation();
OpenACCReductionOperator Op = readEnum<OpenACCReductionOperator>();
llvm::SmallVector<Expr *> VarList = readOpenACCVarList();
- llvm::SmallVector<OpenACCReductionRecipe> RecipeList;
+ llvm::SmallVector<OpenACCReductionRecipeWithStorage> RecipeList;
for (unsigned I = 0; I < VarList.size(); ++I) {
VarDecl *Recipe = readDeclAs<VarDecl>();
diff --git a/clang/test/CIR/CodeGen/virtual-destructor-calls.cpp b/clang/test/CIR/CodeGen/virtual-destructor-calls.cpp
new file mode 100644
index 000000000000..08a6b21ca91d
--- /dev/null
+++ b/clang/test/CIR/CodeGen/virtual-destructor-calls.cpp
@@ -0,0 +1,129 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -std=c++20 -mconstructor-aliases -O0 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -std=c++20 -mconstructor-aliases -O0 -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -std=c++20 -mconstructor-aliases -O0 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+// TODO(cir): Try to emit base destructor as an alias at O1 or higher.
+
+// FIXME: LLVM IR dialect does not yet support function ptr globals, which precludes
+// a lot of the proper semantics for properly representing alias functions in LLVM
+// (see the note on LLVM_O1 below).
+
+struct Member {
+ ~Member();
+};
+
+struct A {
+ virtual ~A();
+};
+
+struct B : A {
+ Member m;
+ virtual ~B();
+};
+
+B::~B() { }
+
+// Aliases are inserted before the function definitions in LLVM IR
+// FIXME: These should have unnamed_addr set.
+// LLVM: @_ZN1BD1Ev = alias void (ptr), ptr @_ZN1BD2Ev
+// LLVM: @_ZN1CD1Ev = alias void (ptr), ptr @_ZN1CD2Ev
+
+// OGCG: @_ZN1BD1Ev = unnamed_addr alias void (ptr), ptr @_ZN1BD2Ev
+// OGCG: @_ZN1CD1Ev = unnamed_addr alias void (ptr), ptr @_ZN1CD2Ev
+
+
+// Base (D2) dtor for B: calls A's base dtor.
+
+// CIR: cir.func{{.*}} @_ZN1BD2Ev
+// CIR: cir.call @_ZN6MemberD1Ev
+// CIR: cir.call @_ZN1AD2Ev
+
+// LLVM: define{{.*}} void @_ZN1BD2Ev
+// LLVM: call void @_ZN6MemberD1Ev
+// LLVM: call void @_ZN1AD2Ev
+
+// OGCG: define{{.*}} @_ZN1BD2Ev
+// OGCG: call void @_ZN6MemberD1Ev
+// OGCG: call void @_ZN1AD2Ev
+
+// Complete (D1) dtor for B: just an alias because there are no virtual bases.
+
+// CIR: cir.func{{.*}} @_ZN1BD1Ev(!cir.ptr<!rec_B>) alias(@_ZN1BD2Ev)
+// This is defined above for LLVM and OGCG.
+
+// Deleting (D0) dtor for B: defers to the complete dtor but also calls operator delete.
+
+// CIR: cir.func{{.*}} @_ZN1BD0Ev
+// CIR: cir.call @_ZN1BD1Ev(%[[THIS:.*]]) nothrow : (!cir.ptr<!rec_B>) -> ()
+// CIR: %[[THIS_VOID:.*]] = cir.cast bitcast %[[THIS]] : !cir.ptr<!rec_B> -> !cir.ptr<!void>
+// CIR: %[[SIZE:.*]] = cir.const #cir.int<16>
+// CIR: cir.call @_ZdlPvm(%[[THIS_VOID]], %[[SIZE]])
+
+// LLVM: define{{.*}} void @_ZN1BD0Ev
+// LLVM: call void @_ZN1BD1Ev(ptr %[[THIS:.*]])
+// LLVM: call void @_ZdlPvm(ptr %[[THIS]], i64 16)
+
+// OGCG: define{{.*}} @_ZN1BD0Ev
+// OGCG: call void @_ZN1BD1Ev(ptr{{.*}} %[[THIS:.*]])
+// OGCG: call void @_ZdlPvm(ptr{{.*}} %[[THIS]], i64{{.*}} 16)
+
+struct C : B {
+ ~C();
+};
+
+C::~C() { }
+
+// Base (D2) dtor for C: calls B's base dtor.
+
+// CIR: cir.func{{.*}} @_ZN1CD2Ev
+// CIR: %[[B:.*]] = cir.base_class_addr %[[THIS:.*]] : !cir.ptr<!rec_C> nonnull [0] -> !cir.ptr<!rec_B>
+// CIR: cir.call @_ZN1BD2Ev(%[[B]])
+
+// LLVM: define{{.*}} void @_ZN1CD2Ev
+// LLVM: call void @_ZN1BD2Ev
+
+// OGCG: define{{.*}} @_ZN1CD2Ev
+// OGCG: call void @_ZN1BD2Ev
+
+// Complete (D1) dtor for C: just an alias because there are no virtual bases.
+
+// CIR: cir.func{{.*}} @_ZN1CD1Ev(!cir.ptr<!rec_C>) alias(@_ZN1CD2Ev)
+// This is defined above for LLVM and OGCG.
+
+
+// Deleting (D0) dtor for C: defers to the complete dtor but also calls operator delete.
+
+// CIR: cir.func{{.*}} @_ZN1CD0Ev
+// CIR: cir.call @_ZN1CD1Ev(%[[THIS:.*]]) nothrow : (!cir.ptr<!rec_C>) -> ()
+// CIR: %[[THIS_VOID:.*]] = cir.cast bitcast %[[THIS]] : !cir.ptr<!rec_C> -> !cir.ptr<!void>
+// CIR: %[[SIZE:.*]] = cir.const #cir.int<16>
+// CIR: cir.call @_ZdlPvm(%[[THIS_VOID]], %[[SIZE]])
+
+// LLVM: define{{.*}} void @_ZN1CD0Ev
+// LLVM: call void @_ZN1CD1Ev(ptr %[[THIS:.*]])
+// LLVM: call void @_ZdlPvm(ptr %[[THIS]], i64 16)
+
+// OGCG: define{{.*}} @_ZN1CD0Ev
+// OGCG: call void @_ZN1CD1Ev(ptr{{.*}} %[[THIS:.*]])
+// OGCG: call void @_ZdlPvm(ptr{{.*}} %[[THIS]], i64{{.*}} 16)
+
+namespace PR12798 {
+ // A qualified call to a base class destructor should not undergo virtual
+ // dispatch. Template instantiation used to lose the qualifier.
+ struct A { virtual ~A(); };
+ template<typename T> void f(T *p) { p->A::~A(); }
+
+ // CIR: cir.func{{.*}} @_ZN7PR127981fINS_1AEEEvPT_
+ // CIR: cir.call @_ZN7PR127981AD1Ev
+
+ // LLVM: define{{.*}} @_ZN7PR127981fINS_1AEEEvPT_
+ // LLVM: call void @_ZN7PR127981AD1Ev
+
+ // OGCG: define{{.*}} @_ZN7PR127981fINS_1AEEEvPT_
+ // OGCG: call void @_ZN7PR127981AD1Ev
+
+ template void f(A*);
+}
diff --git a/clang/test/OpenMP/target_indirect_codegen.cpp b/clang/test/OpenMP/target_indirect_codegen.cpp
index 20a36c293551..ec249dd43b93 100644
--- a/clang/test/OpenMP/target_indirect_codegen.cpp
+++ b/clang/test/OpenMP/target_indirect_codegen.cpp
@@ -4,6 +4,12 @@
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=51 -x c++ -triple amdgcn-amd-amdhsa %s -fopenmp-is-target-device -fvisibility=protected -fopenmp-host-ir-file-path %t-host.bc -emit-pch -o %t
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=51 -x c++ -triple amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fvisibility=protected -fopenmp-host-ir-file-path %t-host.bc -include-pch %t -o - | FileCheck %s --check-prefix=DEVICE
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=51 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=spirv64-intel -emit-llvm %s -o - | FileCheck %s --check-prefix=HOST
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=51 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=spirv64-intel -emit-llvm-bc %s -o %t-spirv-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=51 -x c++ -triple spirv64-intel -emit-llvm %s -fopenmp-is-target-device -fvisibility=protected -fopenmp-host-ir-file-path %t-spirv-host.bc -o - | FileCheck %s --check-prefix=DEVICE
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=51 -x c++ -triple spirv64-intel %s -fopenmp-is-target-device -fvisibility=protected -fopenmp-host-ir-file-path %t-spirv-host.bc -emit-pch -o %t
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=51 -x c++ -triple spirv64-intel -emit-llvm %s -fopenmp-is-target-device -fvisibility=protected -fopenmp-host-ir-file-path %t-spirv-host.bc -include-pch %t -o - | FileCheck %s --check-prefix=DEVICE
+
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
@@ -19,10 +25,10 @@
// HOST: @[[BAR_ENTRY_NAME:.+]] = internal unnamed_addr constant [{{[0-9]+}} x i8] c"[[BAR_NAME:__omp_offloading_[0-9a-z]+_[0-9a-z]+_bar_l[0-9]+]]\00"
// HOST: @.offloading.entry.[[BAR_NAME]] = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 1, i32 8, ptr @_ZL3barv, ptr @[[BAR_ENTRY_NAME]], i64 8, i64 0, ptr null }
//.
-// DEVICE: @[[FOO_NAME:__omp_offloading_[0-9a-z]+_[0-9a-z]+_foo_l[0-9]+]] = protected addrspace(1) constant ptr @_Z3foov
-// DEVICE: @[[BAZ_NAME:__omp_offloading_[0-9a-z]+_[0-9a-z]+_baz_l[0-9]+]] = protected addrspace(1) constant ptr @_Z3bazv
+// DEVICE: @[[FOO_NAME:__omp_offloading_[0-9a-z]+_[0-9a-z]+_foo_l[0-9]+]] = protected addrspace(1) constant {{ptr|ptr addrspace\(9\)}} @_Z3foov
+// DEVICE: @[[BAZ_NAME:__omp_offloading_[0-9a-z]+_[0-9a-z]+_baz_l[0-9]+]] = protected addrspace(1) constant {{ptr|ptr addrspace\(9\)}} @_Z3bazv
// DEVICE: @var = protected addrspace(1) global i8 0, align 1
-// DEVICE: @[[BAR_NAME:__omp_offloading_[0-9a-z]+_[0-9a-z]+_bar_l[0-9]+]] = protected addrspace(1) constant ptr @_ZL3barv
+// DEVICE: @[[BAR_NAME:__omp_offloading_[0-9a-z]+_[0-9a-z]+_bar_l[0-9]+]] = protected addrspace(1) constant {{ptr|ptr addrspace\(9\)}} @_ZL3barv
//.
void foo() { }
#pragma omp declare target to(foo) indirect
diff --git a/clang/test/OpenMP/amdgcn_parallel_num_threads_strict_messages.cpp b/clang/test/OpenMP/target_parallel_num_threads_strict_messages.cpp
index 513754b0bbad..8ceff02d4ecf 100644
--- a/clang/test/OpenMP/amdgcn_parallel_num_threads_strict_messages.cpp
+++ b/clang/test/OpenMP/target_parallel_num_threads_strict_messages.cpp
@@ -5,6 +5,13 @@
// RUN: %clang_cc1 -DF3 -verify -fopenmp -fopenmp-version=60 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host-ppc.bc
// RUN: %clang_cc1 -DF3 -DTARGET -verify -fopenmp -fopenmp-version=60 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host-ppc.bc -o /dev/null
+// RUN: %clang_cc1 -DF1 -verify -fopenmp -fopenmp-version=60 -triple x86_64-unknown-unknown -fopenmp-targets=spirv64-intel -emit-llvm-bc %s -o %t-spirv-ppc-host-ppc.bc
+// RUN: %clang_cc1 -DF1 -DTARGET -verify -fopenmp -fopenmp-version=60 -triple spirv64-intel -fopenmp-targets=spirv64-intel -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-spirv-ppc-host-ppc.bc -o /dev/null
+// RUN: %clang_cc1 -DF2 -verify -fopenmp -fopenmp-version=60 -triple x86_64-unknown-unknown -fopenmp-targets=spirv64-intel -emit-llvm-bc %s -o %t-spirv-ppc-host-ppc.bc
+// RUN: %clang_cc1 -DF2 -DTARGET -verify -fopenmp -fopenmp-version=60 -triple spirv64-intel -fopenmp-targets=spirv64-intel -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-spirv-ppc-host-ppc.bc -o /dev/null
+// RUN: %clang_cc1 -DF3 -verify -fopenmp -fopenmp-version=60 -triple x86_64-unknown-unknown -fopenmp-targets=spirv64-intel -emit-llvm-bc %s -o %t-spirv-ppc-host-ppc.bc
+// RUN: %clang_cc1 -DF3 -DTARGET -verify -fopenmp -fopenmp-version=60 -triple spirv64-intel -fopenmp-targets=spirv64-intel -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-spirv-ppc-host-ppc.bc -o /dev/null
+
#ifndef TARGET
// expected-no-diagnostics
#endif
diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
index 2adfd6f2510d..c3cd119b9617 100644
--- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
@@ -459,6 +459,7 @@ struct IntrinsicLibrary {
mlir::Value genTime(mlir::Type, llvm::ArrayRef<mlir::Value>);
void genTMABulkCommitGroup(llvm::ArrayRef<fir::ExtendedValue>);
void genTMABulkG2S(llvm::ArrayRef<fir::ExtendedValue>);
+ void genTMABulkS2G(llvm::ArrayRef<fir::ExtendedValue>);
void genTMABulkWaitGroup(llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genTrailz(mlir::Type, llvm::ArrayRef<mlir::Value>);
fir::ExtendedValue genTransfer(mlir::Type,
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index 5fe2a76128e0..e07baafcef0d 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -1027,6 +1027,10 @@ static constexpr IntrinsicHandler handlers[]{
{"dst", asAddr},
{"nbytes", asValue}}},
/*isElemental=*/false},
+ {"tma_bulk_s2g",
+ &I::genTMABulkS2G,
+ {{{"src", asAddr}, {"dst", asAddr}, {"nbytes", asValue}}},
+ /*isElemental=*/false},
{"tma_bulk_wait_group",
&I::genTMABulkWaitGroup,
{{}},
@@ -9227,6 +9231,17 @@ void IntrinsicLibrary::genTMABulkG2S(llvm::ArrayRef<fir::ExtendedValue> args) {
builder, loc, dst, src, barrier, fir::getBase(args[3]), {}, {});
}
+// TMA_BULK_S2G (CUDA)
+void IntrinsicLibrary::genTMABulkS2G(llvm::ArrayRef<fir::ExtendedValue> args) {
+ assert(args.size() == 3);
+ mlir::Value src = convertPtrToNVVMSpace(builder, loc, fir::getBase(args[0]),
+ mlir::NVVM::NVVMMemorySpace::Shared);
+ mlir::Value dst = convertPtrToNVVMSpace(builder, loc, fir::getBase(args[1]),
+ mlir::NVVM::NVVMMemorySpace::Global);
+ mlir::NVVM::CpAsyncBulkSharedCTAToGlobalOp::create(
+ builder, loc, dst, src, fir::getBase(args[2]), {}, {});
+}
+
// TMA_BULK_WAIT_GROUP (CUDA)
void IntrinsicLibrary::genTMABulkWaitGroup(
llvm::ArrayRef<fir::ExtendedValue> args) {
diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
index 609a1fc9fb02..e5c5ba908242 100644
--- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
@@ -558,6 +558,7 @@ static mlir::Value emboxSrc(mlir::PatternRewriter &rewriter,
if (srcTy.isInteger(1)) {
// i1 is not a supported type in the descriptor and it is actually coming
// from a LOGICAL constant. Use the destination type to avoid mismatch.
+ assert(dstEleTy && "expect dst element type to be set");
srcTy = dstEleTy;
src = createConvertOp(rewriter, loc, srcTy, src);
addr = builder.createTemporary(loc, srcTy);
@@ -652,7 +653,8 @@ struct CUFDataTransferOpConversion
// Initialization of an array from a scalar value should be implemented
// via a kernel launch. Use the flang runtime via the Assign function
// until we have more infrastructure.
- mlir::Value src = emboxSrc(rewriter, op, symtab);
+ mlir::Type dstEleTy = fir::unwrapInnerType(fir::unwrapRefType(dstTy));
+ mlir::Value src = emboxSrc(rewriter, op, symtab, dstEleTy);
mlir::Value dst = emboxDst(rewriter, op, symtab);
mlir::func::FuncOp func =
fir::runtime::getRuntimeFunc<mkRTKey(CUFDataTransferCstDesc)>(
diff --git a/flang/module/cudadevice.f90 b/flang/module/cudadevice.f90
index a8b9aa8b57ef..22df9cdf410d 100644
--- a/flang/module/cudadevice.f90
+++ b/flang/module/cudadevice.f90
@@ -2034,6 +2034,15 @@ implicit none
end subroutine
end interface
+ interface
+ attributes(device) subroutine tma_bulk_s2g(src, dst, nbytes)
+ !dir$ ignore_tkr src, dst
+ integer(4), shared :: src(*)
+ integer(4), device :: dst(*)
+ integer(4), value :: nbytes
+ end subroutine
+ end interface
+
contains
attributes(device) subroutine syncthreads()
diff --git a/flang/test/Fir/CUDA/cuda-data-transfer.fir b/flang/test/Fir/CUDA/cuda-data-transfer.fir
index 669300cf6473..5d3215dd07fc 100644
--- a/flang/test/Fir/CUDA/cuda-data-transfer.fir
+++ b/flang/test/Fir/CUDA/cuda-data-transfer.fir
@@ -651,5 +651,45 @@ func.func @_QPsub28() {
// CHECK: %[[BOX_NONE:.*]] = fir.convert %[[DESC]] : (!fir.ref<!fir.box<!fir.logical<8>>>) -> !fir.ref<!fir.box<none>>
// CHECK: fir.call @_FortranACUFDataTransferCstDesc(%{{.*}}, %[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, !fir.ref<!fir.box<none>>, i32, !fir.ref<i8>, i32) -> ()
+func.func @_QPtesti4(%arg0: !fir.ref<i32> {fir.bindc_name = "n1"}, %arg1: !fir.ref<i32> {fir.bindc_name = "n2"}, %arg2: !fir.ref<i32> {fir.bindc_name = "n3"}, %arg3: !fir.ref<i32> {fir.bindc_name = "n4"}) {
+ %true = arith.constant true
+ %c0 = arith.constant 0 : index
+ %c2_i32 = arith.constant 2 : i32
+ %0 = fir.dummy_scope : !fir.dscope
+ %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFtesti4En1"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %2:2 = hlfir.declare %arg1 dummy_scope %0 {uniq_name = "_QFtesti4En2"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %3:2 = hlfir.declare %arg2 dummy_scope %0 {uniq_name = "_QFtesti4En3"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %4:2 = hlfir.declare %arg3 dummy_scope %0 {uniq_name = "_QFtesti4En4"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %5 = fir.load %1#0 : !fir.ref<i32>
+ %6 = arith.divsi %5, %c2_i32 : i32
+ %7 = fir.convert %6 : (i32) -> index
+ %8 = arith.cmpi sgt, %7, %c0 : index
+ %9 = arith.select %8, %7, %c0 : index
+ %10 = fir.load %2#0 : !fir.ref<i32>
+ %11 = arith.divsi %10, %c2_i32 : i32
+ %12 = fir.convert %11 : (i32) -> index
+ %13 = arith.cmpi sgt, %12, %c0 : index
+ %14 = arith.select %13, %12, %c0 : index
+ %15 = fir.load %3#0 : !fir.ref<i32>
+ %16 = arith.divsi %15, %c2_i32 : i32
+ %17 = fir.convert %16 : (i32) -> index
+ %18 = arith.cmpi sgt, %17, %c0 : index
+ %19 = arith.select %18, %17, %c0 : index
+ %20 = fir.load %4#0 : !fir.ref<i32>
+ %21 = arith.divsi %20, %c2_i32 : i32
+ %22 = fir.convert %21 : (i32) -> index
+ %23 = arith.cmpi sgt, %22, %c0 : index
+ %24 = arith.select %23, %22, %c0 : index
+ %25 = cuf.alloc !fir.array<?x?x?x?x!fir.logical<4>>, %9, %14, %19, %24 : index, index, index, index {bindc_name = "lma", data_attr = #cuf.cuda<managed>, uniq_name = "_QFtesti4Elma"} -> !fir.ref<!fir.array<?x?x?x?x!fir.logical<4>>>
+ %26 = fir.shape %9, %14, %19, %24 : (index, index, index, index) -> !fir.shape<4>
+ %27:2 = hlfir.declare %25(%26) {data_attr = #cuf.cuda<managed>, uniq_name = "_QFtesti4Elma"} : (!fir.ref<!fir.array<?x?x?x?x!fir.logical<4>>>, !fir.shape<4>) -> (!fir.box<!fir.array<?x?x?x?x!fir.logical<4>>>, !fir.ref<!fir.array<?x?x?x?x!fir.logical<4>>>)
+ cuf.data_transfer %true to %27#1, %26 : !fir.shape<4> {transfer_kind = #cuf.cuda_transfer<host_device>} : i1, !fir.ref<!fir.array<?x?x?x?x!fir.logical<4>>>
+ cuf.free %27#1 : !fir.ref<!fir.array<?x?x?x?x!fir.logical<4>>> {data_attr = #cuf.cuda<managed>}
+ return
+}
+
+// CHECK-LABEL: func.func @_QPtesti4
+// CHECK: fir.call @_FortranACUFDataTransferCstDesc
+
} // end of module
diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf
index 83ee0118638b..29c348c5260a 100644
--- a/flang/test/Lower/CUDA/cuda-device-proc.cuf
+++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf
@@ -438,7 +438,7 @@ end subroutine
! CHECK: nvvm.cp.async.bulk.commit.group
! CHECK: nvvm.cp.async.bulk.wait_group 0
-attributes(global) subroutine test_bulk_g2s(c, a, b, n)
+attributes(global) subroutine test_bulk_g2s(a)
real(8), device :: a(*)
real(8), shared :: tmpa(1024)
integer(8), shared :: barrier1
@@ -448,3 +448,13 @@ end subroutine
! CHECK-LABEL: func.func @_QPtest_bulk_g2s
! CHECK: nvvm.cp.async.bulk.shared.cluster.global %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : <7>, <1>
+
+attributes(global) subroutine test_bulk_s2g(a)
+ real(8), device :: a(*)
+ real(8), shared :: tmpa(1024)
+ integer(4) :: tx_count
+ call tma_bulk_s2g(tmpa, a(j), tx_count)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_bulk_s2g
+! CHECL: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3>
diff --git a/libc/src/string/memory_utils/op_aarch64.h b/libc/src/string/memory_utils/op_aarch64.h
index e552601fbb70..b5c3bb74e741 100644
--- a/libc/src/string/memory_utils/op_aarch64.h
+++ b/libc/src/string/memory_utils/op_aarch64.h
@@ -84,8 +84,7 @@ template <size_t Size> struct Bcmp {
uint8x16_t a = vld1q_u8(_p1);
uint8x16_t n = vld1q_u8(_p2);
uint8x16_t an = veorq_u8(a, n);
- uint32x2_t an_reduced = vqmovn_u64(vreinterpretq_u64_u8(an));
- return vmaxv_u32(an_reduced);
+ return vmaxvq_u32(vreinterpretq_u32_u8(an));
} else if constexpr (Size == 32) {
auto _p1 = as_u8(p1);
auto _p2 = as_u8(p2);
@@ -97,12 +96,9 @@ template <size_t Size> struct Bcmp {
uint8x16_t bo = veorq_u8(b, o);
// anbo = (a ^ n) | (b ^ o). At least one byte is nonzero if there is
// a difference between the two buffers. We reduce this value down to 4
- // bytes in two steps. First, calculate the saturated move value when
- // going from 2x64b to 2x32b. Second, compute the max of the 2x32b to get
- // a single 32 bit nonzero value if a mismatch occurred.
+ // bytes using the UMAXV instruction to compute the max across the vector.
uint8x16_t anbo = vorrq_u8(an, bo);
- uint32x2_t anbo_reduced = vqmovn_u64(vreinterpretq_u64_u8(anbo));
- return vmaxv_u32(anbo_reduced);
+ return vmaxvq_u32(vreinterpretq_u32_u8(anbo));
} else if constexpr ((Size % BlockSize) == 0) {
for (size_t offset = 0; offset < Size; offset += BlockSize)
if (auto value = Bcmp<BlockSize>::block(p1 + offset, p2 + offset))
@@ -129,8 +125,7 @@ template <size_t Size> struct Bcmp {
uint8x16_t bo = veorq_u8(b, o);
// anbo = (a ^ n) | (b ^ o)
uint8x16_t anbo = vorrq_u8(an, bo);
- uint32x2_t anbo_reduced = vqmovn_u64(vreinterpretq_u64_u8(anbo));
- return vmaxv_u32(anbo_reduced);
+ return vmaxvq_u32(vreinterpretq_u32_u8(anbo));
} else if constexpr (Size == 32) {
auto _p1 = as_u8(p1);
auto _p2 = as_u8(p2);
@@ -150,9 +145,8 @@ template <size_t Size> struct Bcmp {
uint8x16_t cpdq = vorrq_u8(cp, dq);
// abnocpdq = ((a ^ n) | (b ^ o)) | ((c ^ p) | (d ^ q)). Reduce this to
// a nonzero 32 bit value if a mismatch occurred.
- uint64x2_t abnocpdq = vreinterpretq_u64_u8(anbo | cpdq);
- uint32x2_t abnocpdq_reduced = vqmovn_u64(abnocpdq);
- return vmaxv_u32(abnocpdq_reduced);
+ uint8x16_t abnocpdq = anbo | cpdq;
+ return vmaxvq_u32(vreinterpretq_u32_u8(abnocpdq));
} else {
static_assert(cpp::always_false<decltype(Size)>, "SIZE not implemented");
}
diff --git a/lld/COFF/DLL.cpp b/lld/COFF/DLL.cpp
index f4284efee8d4..10bc898244a4 100644
--- a/lld/COFF/DLL.cpp
+++ b/lld/COFF/DLL.cpp
@@ -320,16 +320,17 @@ static const uint8_t thunkARM64[] = {
};
static const uint8_t tailMergeARM64[] = {
- 0xfd, 0x7b, 0xb3, 0xa9, // stp x29, x30, [sp, #-208]!
+ 0xfd, 0x7b, 0xb2, 0xa9, // stp x29, x30, [sp, #-224]!
0xfd, 0x03, 0x00, 0x91, // mov x29, sp
0xe0, 0x07, 0x01, 0xa9, // stp x0, x1, [sp, #16]
0xe2, 0x0f, 0x02, 0xa9, // stp x2, x3, [sp, #32]
0xe4, 0x17, 0x03, 0xa9, // stp x4, x5, [sp, #48]
0xe6, 0x1f, 0x04, 0xa9, // stp x6, x7, [sp, #64]
- 0xe0, 0x87, 0x02, 0xad, // stp q0, q1, [sp, #80]
- 0xe2, 0x8f, 0x03, 0xad, // stp q2, q3, [sp, #112]
- 0xe4, 0x97, 0x04, 0xad, // stp q4, q5, [sp, #144]
- 0xe6, 0x9f, 0x05, 0xad, // stp q6, q7, [sp, #176]
+ 0xe8, 0x2b, 0x00, 0xf9, // str x8, [sp, #80]
+ 0xe0, 0x07, 0x03, 0xad, // stp q0, q1, [sp, #96]
+ 0xe2, 0x0f, 0x04, 0xad, // stp q2, q3, [sp, #128]
+ 0xe4, 0x17, 0x05, 0xad, // stp q4, q5, [sp, #160]
+ 0xe6, 0x1f, 0x06, 0xad, // stp q6, q7, [sp, #192]
0xe1, 0x03, 0x11, 0xaa, // mov x1, x17
0x00, 0x00, 0x00, 0x90, // adrp x0, #0 DELAY_IMPORT_DESCRIPTOR
0x00, 0x00, 0x00, 0x91, // add x0, x0, #0 :lo12:DELAY_IMPORT_DESCRIPTOR
@@ -337,15 +338,16 @@ static const uint8_t tailMergeARM64[] = {
0x42, 0x00, 0x00, 0x91, // add x2, x2, #0 :lo12:__delayLoadHelper2
0x40, 0x00, 0x3f, 0xd6, // blr x2
0xf0, 0x03, 0x00, 0xaa, // mov x16, x0
- 0xe6, 0x9f, 0x45, 0xad, // ldp q6, q7, [sp, #176]
- 0xe4, 0x97, 0x44, 0xad, // ldp q4, q5, [sp, #144]
- 0xe2, 0x8f, 0x43, 0xad, // ldp q2, q3, [sp, #112]
- 0xe0, 0x87, 0x42, 0xad, // ldp q0, q1, [sp, #80]
+ 0xe6, 0x1f, 0x46, 0xad, // ldp q6, q7, [sp, #192]
+ 0xe4, 0x17, 0x45, 0xad, // ldp q4, q5, [sp, #160]
+ 0xe2, 0x0f, 0x44, 0xad, // ldp q2, q3, [sp, #128]
+ 0xe0, 0x07, 0x43, 0xad, // ldp q0, q1, [sp, #96]
+ 0xe8, 0x2b, 0x40, 0xf9, // ldr x8, [sp, #80]
0xe6, 0x1f, 0x44, 0xa9, // ldp x6, x7, [sp, #64]
0xe4, 0x17, 0x43, 0xa9, // ldp x4, x5, [sp, #48]
0xe2, 0x0f, 0x42, 0xa9, // ldp x2, x3, [sp, #32]
0xe0, 0x07, 0x41, 0xa9, // ldp x0, x1, [sp, #16]
- 0xfd, 0x7b, 0xcd, 0xa8, // ldp x29, x30, [sp], #208
+ 0xfd, 0x7b, 0xce, 0xa8, // ldp x29, x30, [sp], #224
0x00, 0x02, 0x1f, 0xd6, // br x16
};
@@ -556,11 +558,11 @@ public:
void writeTo(uint8_t *buf) const override {
memcpy(buf, tailMergeARM64, sizeof(tailMergeARM64));
- applyArm64Addr(buf + 44, desc->getRVA(), rva + 44, 12);
- applyArm64Imm(buf + 48, desc->getRVA() & 0xfff, 0);
+ applyArm64Addr(buf + 48, desc->getRVA(), rva + 48, 12);
+ applyArm64Imm(buf + 52, desc->getRVA() & 0xfff, 0);
if (helper) {
- applyArm64Addr(buf + 52, helper->getRVA(), rva + 52, 12);
- applyArm64Imm(buf + 56, helper->getRVA() & 0xfff, 0);
+ applyArm64Addr(buf + 56, helper->getRVA(), rva + 56, 12);
+ applyArm64Imm(buf + 60, helper->getRVA() & 0xfff, 0);
}
}
diff --git a/lld/test/COFF/arm64-delayimport.yaml b/lld/test/COFF/arm64-delayimport.yaml
index 7090206dea38..5d26978db8be 100644
--- a/lld/test/COFF/arm64-delayimport.yaml
+++ b/lld/test/COFF/arm64-delayimport.yaml
@@ -8,33 +8,35 @@
# DISASM: 140001014: d0000011 adrp x17, 0x140003000
# DISASM: 140001018: 91002231 add x17, x17, #8
# DISASM: 14000101c: 14000001 b 0x140001020 <.text+0x20>
-# DISASM: 140001020: a9b37bfd stp x29, x30, [sp, #-208]!
+# DISASM: 140001020: a9b27bfd stp x29, x30, [sp, #-224]!
# DISASM: 140001024: 910003fd mov x29, sp
# DISASM: 140001028: a90107e0 stp x0, x1, [sp, #16]
# DISASM: 14000102c: a9020fe2 stp x2, x3, [sp, #32]
# DISASM: 140001030: a90317e4 stp x4, x5, [sp, #48]
# DISASM: 140001034: a9041fe6 stp x6, x7, [sp, #64]
-# DISASM: 140001038: ad0287e0 stp q0, q1, [sp, #80]
-# DISASM: 14000103c: ad038fe2 stp q2, q3, [sp, #112]
-# DISASM: 140001040: ad0497e4 stp q4, q5, [sp, #144]
-# DISASM: 140001044: ad059fe6 stp q6, q7, [sp, #176]
-# DISASM: 140001048: aa1103e1 mov x1, x17
-# DISASM: 14000104c: b0000000 adrp x0, 0x140002000
-# DISASM: 140001050: 91000000 add x0, x0, #0
-# DISASM: 140001054: 90000002 adrp x2, 0x140001000 <.text>
-# DISASM: 140001058: 91000042 add x2, x2, #0
-# DISASM: 14000105c: d63f0040 blr x2
-# DISASM: 140001060: aa0003f0 mov x16, x0
-# DISASM: 140001064: ad459fe6 ldp q6, q7, [sp, #176]
-# DISASM: 140001068: ad4497e4 ldp q4, q5, [sp, #144]
-# DISASM: 14000106c: ad438fe2 ldp q2, q3, [sp, #112]
-# DISASM: 140001070: ad4287e0 ldp q0, q1, [sp, #80]
-# DISASM: 140001074: a9441fe6 ldp x6, x7, [sp, #64]
-# DISASM: 140001078: a94317e4 ldp x4, x5, [sp, #48]
-# DISASM: 14000107c: a9420fe2 ldp x2, x3, [sp, #32]
-# DISASM: 140001080: a94107e0 ldp x0, x1, [sp, #16]
-# DISASM: 140001084: a8cd7bfd ldp x29, x30, [sp], #208
-# DISASM: 140001088: d61f0200 br x16
+# DISASM: 140001038: f9002be8 str x8, [sp, #80]
+# DISASM: 14000103c: ad0307e0 stp q0, q1, [sp, #96]
+# DISASM: 140001040: ad040fe2 stp q2, q3, [sp, #128]
+# DISASM: 140001044: ad0517e4 stp q4, q5, [sp, #160]
+# DISASM: 140001048: ad061fe6 stp q6, q7, [sp, #192]
+# DISASM: 14000104c: aa1103e1 mov x1, x17
+# DISASM: 140001050: b0000000 adrp x0, 0x140002000
+# DISASM: 140001054: 91000000 add x0, x0, #0
+# DISASM: 140001058: 90000002 adrp x2, 0x140001000 <.text>
+# DISASM: 14000105c: 91000042 add x2, x2, #0
+# DISASM: 140001060: d63f0040 blr x2
+# DISASM: 140001064: aa0003f0 mov x16, x0
+# DISASM: 140001068: ad461fe6 ldp q6, q7, [sp, #192]
+# DISASM: 14000106c: ad4517e4 ldp q4, q5, [sp, #160]
+# DISASM: 140001070: ad440fe2 ldp q2, q3, [sp, #128]
+# DISASM: 140001074: ad4307e0 ldp q0, q1, [sp, #96]
+# DISASM: 140001078: f9402be8 ldr x8, [sp, #80]
+# DISASM: 14000107c: a9441fe6 ldp x6, x7, [sp, #64]
+# DISASM: 140001080: a94317e4 ldp x4, x5, [sp, #48]
+# DISASM: 140001084: a9420fe2 ldp x2, x3, [sp, #32]
+# DISASM: 140001088: a94107e0 ldp x0, x1, [sp, #16]
+# DISASM: 14000108c: a8ce7bfd ldp x29, x30, [sp], #224
+# DISASM: 140001090: d61f0200 br x16
# IMPORTS: Format: COFF-ARM64
# IMPORTS: Arch: aarch64
diff --git a/lld/test/COFF/arm64x-delayimport.test b/lld/test/COFF/arm64x-delayimport.test
index e22cc6d5c42f..e705fb0efc45 100644
--- a/lld/test/COFF/arm64x-delayimport.test
+++ b/lld/test/COFF/arm64x-delayimport.test
@@ -61,33 +61,35 @@ DISASM-NEXT: 180001010: d61f0200 br x16
DISASM-NEXT: 180001014: b0000031 adrp x17, 0x180006000
DISASM-NEXT: 180001018: 91022231 add x17, x17, #0x88
DISASM-NEXT: 18000101c: 14000001 b 0x180001020 <.text+0x20>
-DISASM-NEXT: 180001020: a9b37bfd stp x29, x30, [sp, #-0xd0]!
+DISASM-NEXT: 180001020: a9b27bfd stp x29, x30, [sp, #-0xe0]!
DISASM-NEXT: 180001024: 910003fd mov x29, sp
DISASM-NEXT: 180001028: a90107e0 stp x0, x1, [sp, #0x10]
DISASM-NEXT: 18000102c: a9020fe2 stp x2, x3, [sp, #0x20]
DISASM-NEXT: 180001030: a90317e4 stp x4, x5, [sp, #0x30]
DISASM-NEXT: 180001034: a9041fe6 stp x6, x7, [sp, #0x40]
-DISASM-NEXT: 180001038: ad0287e0 stp q0, q1, [sp, #0x50]
-DISASM-NEXT: 18000103c: ad038fe2 stp q2, q3, [sp, #0x70]
-DISASM-NEXT: 180001040: ad0497e4 stp q4, q5, [sp, #0x90]
-DISASM-NEXT: 180001044: ad059fe6 stp q6, q7, [sp, #0xb0]
-DISASM-NEXT: 180001048: aa1103e1 mov x1, x17
-DISASM-NEXT: 18000104c: f0000000 adrp x0, 0x180004000
-DISASM-NEXT: 180001050: 910d2000 add x0, x0, #0x348
-DISASM-NEXT: 180001054: 90000002 adrp x2, 0x180001000 <.text>
-DISASM-NEXT: 180001058: 91000042 add x2, x2, #0x0
-DISASM-NEXT: 18000105c: d63f0040 blr x2
-DISASM-NEXT: 180001060: aa0003f0 mov x16, x0
-DISASM-NEXT: 180001064: ad459fe6 ldp q6, q7, [sp, #0xb0]
-DISASM-NEXT: 180001068: ad4497e4 ldp q4, q5, [sp, #0x90]
-DISASM-NEXT: 18000106c: ad438fe2 ldp q2, q3, [sp, #0x70]
-DISASM-NEXT: 180001070: ad4287e0 ldp q0, q1, [sp, #0x50]
-DISASM-NEXT: 180001074: a9441fe6 ldp x6, x7, [sp, #0x40]
-DISASM-NEXT: 180001078: a94317e4 ldp x4, x5, [sp, #0x30]
-DISASM-NEXT: 18000107c: a9420fe2 ldp x2, x3, [sp, #0x20]
-DISASM-NEXT: 180001080: a94107e0 ldp x0, x1, [sp, #0x10]
-DISASM-NEXT: 180001084: a8cd7bfd ldp x29, x30, [sp], #0xd0
-DISASM-NEXT: 180001088: d61f0200 br x16
+DISASM-NEXT: 180001038: f9002be8 str x8, [sp, #0x50]
+DISASM-NEXT: 18000103c: ad0307e0 stp q0, q1, [sp, #0x60]
+DISASM-NEXT: 180001040: ad040fe2 stp q2, q3, [sp, #0x80]
+DISASM-NEXT: 180001044: ad0517e4 stp q4, q5, [sp, #0xa0]
+DISASM-NEXT: 180001048: ad061fe6 stp q6, q7, [sp, #0xc0]
+DISASM-NEXT: 18000104c: aa1103e1 mov x1, x17
+DISASM-NEXT: 180001050: f0000000 adrp x0, 0x180004000
+DISASM-NEXT: 180001054: 910d2000 add x0, x0, #0x348
+DISASM-NEXT: 180001058: 90000002 adrp x2, 0x180001000 <.text>
+DISASM-NEXT: 18000105c: 91000042 add x2, x2, #0x0
+DISASM-NEXT: 180001060: d63f0040 blr x2
+DISASM-NEXT: 180001064: aa0003f0 mov x16, x0
+DISASM-NEXT: 180001068: ad461fe6 ldp q6, q7, [sp, #0xc0]
+DISASM-NEXT: 18000106c: ad4517e4 ldp q4, q5, [sp, #0xa0]
+DISASM-NEXT: 180001070: ad440fe2 ldp q2, q3, [sp, #0x80]
+DISASM-NEXT: 180001074: ad4307e0 ldp q0, q1, [sp, #0x60]
+DISASM-NEXT: 180001078: f9402be8 ldr x8, [sp, #0x50]
+DISASM-NEXT: 18000107c: a9441fe6 ldp x6, x7, [sp, #0x40]
+DISASM-NEXT: 180001080: a94317e4 ldp x4, x5, [sp, #0x30]
+DISASM-NEXT: 180001084: a9420fe2 ldp x2, x3, [sp, #0x20]
+DISASM-NEXT: 180001088: a94107e0 ldp x0, x1, [sp, #0x10]
+DISASM-NEXT: 18000108c: a8ce7bfd ldp x29, x30, [sp], #0xe0
+DISASM-NEXT: 180001090: d61f0200 br x16
DISASM-NEXT: ...
DISASM-NEXT: 180002000: 52800040 mov w0, #0x2 // =2
DISASM-NEXT: 180002004: d65f03c0 ret
@@ -186,33 +188,35 @@ NATIVE-DISASM-NEXT: 180001010: d61f0200 br x16
NATIVE-DISASM-NEXT: 180001014: 90000031 adrp x17, 0x180005000
NATIVE-DISASM-NEXT: 180001018: 91022231 add x17, x17, #0x88
NATIVE-DISASM-NEXT: 18000101c: 14000001 b 0x180001020 <.text+0x20>
-NATIVE-DISASM-NEXT: 180001020: a9b37bfd stp x29, x30, [sp, #-0xd0]!
+NATIVE-DISASM-NEXT: 180001020: a9b27bfd stp x29, x30, [sp, #-0xe0]!
NATIVE-DISASM-NEXT: 180001024: 910003fd mov x29, sp
NATIVE-DISASM-NEXT: 180001028: a90107e0 stp x0, x1, [sp, #0x10]
NATIVE-DISASM-NEXT: 18000102c: a9020fe2 stp x2, x3, [sp, #0x20]
NATIVE-DISASM-NEXT: 180001030: a90317e4 stp x4, x5, [sp, #0x30]
NATIVE-DISASM-NEXT: 180001034: a9041fe6 stp x6, x7, [sp, #0x40]
-NATIVE-DISASM-NEXT: 180001038: ad0287e0 stp q0, q1, [sp, #0x50]
-NATIVE-DISASM-NEXT: 18000103c: ad038fe2 stp q2, q3, [sp, #0x70]
-NATIVE-DISASM-NEXT: 180001040: ad0497e4 stp q4, q5, [sp, #0x90]
-NATIVE-DISASM-NEXT: 180001044: ad059fe6 stp q6, q7, [sp, #0xb0]
-NATIVE-DISASM-NEXT: 180001048: aa1103e1 mov x1, x17
-NATIVE-DISASM-NEXT: 18000104c: d0000000 adrp x0, 0x180003000
-NATIVE-DISASM-NEXT: 180001050: 910cc000 add x0, x0, #0x330
-NATIVE-DISASM-NEXT: 180001054: 90000002 adrp x2, 0x180001000 <.text>
-NATIVE-DISASM-NEXT: 180001058: 91000042 add x2, x2, #0x0
-NATIVE-DISASM-NEXT: 18000105c: d63f0040 blr x2
-NATIVE-DISASM-NEXT: 180001060: aa0003f0 mov x16, x0
-NATIVE-DISASM-NEXT: 180001064: ad459fe6 ldp q6, q7, [sp, #0xb0]
-NATIVE-DISASM-NEXT: 180001068: ad4497e4 ldp q4, q5, [sp, #0x90]
-NATIVE-DISASM-NEXT: 18000106c: ad438fe2 ldp q2, q3, [sp, #0x70]
-NATIVE-DISASM-NEXT: 180001070: ad4287e0 ldp q0, q1, [sp, #0x50]
-NATIVE-DISASM-NEXT: 180001074: a9441fe6 ldp x6, x7, [sp, #0x40]
-NATIVE-DISASM-NEXT: 180001078: a94317e4 ldp x4, x5, [sp, #0x30]
-NATIVE-DISASM-NEXT: 18000107c: a9420fe2 ldp x2, x3, [sp, #0x20]
-NATIVE-DISASM-NEXT: 180001080: a94107e0 ldp x0, x1, [sp, #0x10]
-NATIVE-DISASM-NEXT: 180001084: a8cd7bfd ldp x29, x30, [sp], #0xd0
-NATIVE-DISASM-NEXT: 180001088: d61f0200 br x16
+NATIVE-DISASM-NEXT: 180001038: f9002be8 str x8, [sp, #0x50]
+NATIVE-DISASM-NEXT: 18000103c: ad0307e0 stp q0, q1, [sp, #0x60]
+NATIVE-DISASM-NEXT: 180001040: ad040fe2 stp q2, q3, [sp, #0x80]
+NATIVE-DISASM-NEXT: 180001044: ad0517e4 stp q4, q5, [sp, #0xa0]
+NATIVE-DISASM-NEXT: 180001048: ad061fe6 stp q6, q7, [sp, #0xc0]
+NATIVE-DISASM-NEXT: 18000104c: aa1103e1 mov x1, x17
+NATIVE-DISASM-NEXT: 180001050: d0000000 adrp x0, 0x180003000
+NATIVE-DISASM-NEXT: 180001054: 910cc000 add x0, x0, #0x330
+NATIVE-DISASM-NEXT: 180001058: 90000002 adrp x2, 0x180001000 <.text>
+NATIVE-DISASM-NEXT: 18000105c: 91000042 add x2, x2, #0x0
+NATIVE-DISASM-NEXT: 180001060: d63f0040 blr x2
+NATIVE-DISASM-NEXT: 180001064: aa0003f0 mov x16, x0
+NATIVE-DISASM-NEXT: 180001068: ad461fe6 ldp q6, q7, [sp, #0xc0]
+NATIVE-DISASM-NEXT: 18000106c: ad4517e4 ldp q4, q5, [sp, #0xa0]
+NATIVE-DISASM-NEXT: 180001070: ad440fe2 ldp q2, q3, [sp, #0x80]
+NATIVE-DISASM-NEXT: 180001074: ad4307e0 ldp q0, q1, [sp, #0x60]
+NATIVE-DISASM-NEXT: 180001078: f9402be8 ldr x8, [sp, #0x50]
+NATIVE-DISASM-NEXT: 18000107c: a9441fe6 ldp x6, x7, [sp, #0x40]
+NATIVE-DISASM-NEXT: 180001080: a94317e4 ldp x4, x5, [sp, #0x30]
+NATIVE-DISASM-NEXT: 180001084: a9420fe2 ldp x2, x3, [sp, #0x20]
+NATIVE-DISASM-NEXT: 180001088: a94107e0 ldp x0, x1, [sp, #0x10]
+NATIVE-DISASM-NEXT: 18000108c: a8ce7bfd ldp x29, x30, [sp], #0xe0
+NATIVE-DISASM-NEXT: 180001090: d61f0200 br x16
RUN: llvm-readobj --coff-load-config out-native.dll | FileCheck --check-prefix=NATIVE-LOADCFG %s
NATIVE-LOADCFG: AuxiliaryDelayloadIAT: 0x4000
diff --git a/lldb/bindings/python/python-typemaps.swig b/lldb/bindings/python/python-typemaps.swig
index 88b6cd9ef6b6..715914fe745f 100644
--- a/lldb/bindings/python/python-typemaps.swig
+++ b/lldb/bindings/python/python-typemaps.swig
@@ -233,6 +233,11 @@ AND call SWIG_fail at the same time, because it will result in a double free.
}
+// For lldb::SBFileSpec::GetPath
+%typemap(in) (char *dst_path, size_t dst_len) = (char *dst_or_null, size_t dst_len);
+%typemap(argout) (char *dst_path, size_t dst_len) = (char *dst_or_null, size_t dst_len);
+
+
// typemap for an outgoing buffer
// See also SBEvent::SBEvent(uint32_t event, const char *cstr, uint32_t cstr_len).
// Ditto for SBProcess::PutSTDIN(const char *src, size_t src_len).
diff --git a/lldb/source/Target/RegisterContextUnwind.cpp b/lldb/source/Target/RegisterContextUnwind.cpp
index c6d15fc6be0a..252bee2b5d72 100644
--- a/lldb/source/Target/RegisterContextUnwind.cpp
+++ b/lldb/source/Target/RegisterContextUnwind.cpp
@@ -52,6 +52,14 @@ static ConstString GetSymbolOrFunctionName(const SymbolContext &sym_ctx) {
return ConstString();
}
+static bool CallFrameAddressIsValid(ABISP abi_sp, lldb::addr_t cfa) {
+ if (cfa == LLDB_INVALID_ADDRESS)
+ return false;
+ if (abi_sp)
+ return abi_sp->CallFrameAddressIsValid(cfa);
+ return cfa != 0 && cfa != 1;
+}
+
RegisterContextUnwind::RegisterContextUnwind(Thread &thread,
const SharedPtr &next_frame,
SymbolContext &sym_ctx,
@@ -448,7 +456,7 @@ void RegisterContextUnwind::InitializeNonZerothFrame() {
ReadFrameAddress(row_register_kind, row->GetAFAValue(), m_afa);
// A couple of sanity checks..
- if (m_cfa == LLDB_INVALID_ADDRESS || m_cfa == 0 || m_cfa == 1) {
+ if (!CallFrameAddressIsValid(abi_sp, m_cfa)) {
UnwindLogMsg("could not find a valid cfa address");
m_frame_type = eNotAValidFrame;
return;
@@ -1847,9 +1855,11 @@ bool RegisterContextUnwind::TryFallbackUnwindPlan() {
active_row->GetCFAValue().GetValueType() !=
UnwindPlan::Row::FAValue::unspecified) {
addr_t new_cfa;
+ ProcessSP process_sp = m_thread.GetProcess();
+ ABISP abi_sp = process_sp ? process_sp->GetABI() : nullptr;
if (!ReadFrameAddress(m_fallback_unwind_plan_sp->GetRegisterKind(),
- active_row->GetCFAValue(), new_cfa) ||
- new_cfa == 0 || new_cfa == 1 || new_cfa == LLDB_INVALID_ADDRESS) {
+ active_row->GetCFAValue(), new_cfa) ||
+ !CallFrameAddressIsValid(abi_sp, new_cfa)) {
UnwindLogMsg("failed to get cfa with fallback unwindplan");
m_fallback_unwind_plan_sp.reset();
m_full_unwind_plan_sp = original_full_unwind_plan_sp;
@@ -1870,10 +1880,9 @@ bool RegisterContextUnwind::TryFallbackUnwindPlan() {
if (ReadRegisterValueFromRegisterLocation(regloc, reg_info,
reg_value)) {
new_caller_pc_value = reg_value.GetAsUInt64();
- if (ProcessSP process_sp = m_thread.GetProcess()) {
- if (ABISP abi_sp = process_sp->GetABI())
- new_caller_pc_value = abi_sp->FixCodeAddress(new_caller_pc_value);
- }
+ if (process_sp)
+ new_caller_pc_value =
+ process_sp->FixCodeAddress(new_caller_pc_value);
}
}
}
@@ -1932,9 +1941,11 @@ bool RegisterContextUnwind::ForceSwitchToFallbackUnwindPlan() {
active_row->GetCFAValue().GetValueType() !=
UnwindPlan::Row::FAValue::unspecified) {
addr_t new_cfa;
+ ProcessSP process_sp = m_thread.GetProcess();
+ ABISP abi_sp = process_sp ? process_sp->GetABI() : nullptr;
if (!ReadFrameAddress(m_fallback_unwind_plan_sp->GetRegisterKind(),
- active_row->GetCFAValue(), new_cfa) ||
- new_cfa == 0 || new_cfa == 1 || new_cfa == LLDB_INVALID_ADDRESS) {
+ active_row->GetCFAValue(), new_cfa) ||
+ !CallFrameAddressIsValid(abi_sp, new_cfa)) {
UnwindLogMsg("failed to get cfa with fallback unwindplan");
m_fallback_unwind_plan_sp.reset();
return false;
@@ -2055,8 +2066,7 @@ bool RegisterContextUnwind::ReadFrameAddress(
RegisterNumber cfa_reg(m_thread, row_register_kind,
fa.GetRegisterNumber());
if (ReadGPRValue(cfa_reg, cfa_reg_contents)) {
- if (cfa_reg_contents == LLDB_INVALID_ADDRESS || cfa_reg_contents == 0 ||
- cfa_reg_contents == 1) {
+ if (!CallFrameAddressIsValid(abi_sp, cfa_reg_contents)) {
UnwindLogMsg(
"Got an invalid CFA register value - reg %s (%d), value 0x%" PRIx64,
cfa_reg.GetName(), cfa_reg.GetAsKind(eRegisterKindLLDB),
diff --git a/lldb/test/API/functionalities/unwind/cortex-m-exception/TestCortexMExceptionUnwind.py b/lldb/test/API/functionalities/unwind/cortex-m-exception/TestCortexMExceptionUnwind.py
index 10cbd26328f4..fc7bfe43e205 100644
--- a/lldb/test/API/functionalities/unwind/cortex-m-exception/TestCortexMExceptionUnwind.py
+++ b/lldb/test/API/functionalities/unwind/cortex-m-exception/TestCortexMExceptionUnwind.py
@@ -12,7 +12,7 @@ from lldbsuite.test import lldbutil
class TestCortexMExceptionUnwind(TestBase):
NO_DEBUG_INFO_TESTCASE = True
- @skipIfRemote
+ @skipIfLLVMTargetMissing("ARM")
def test_no_fpu(self):
"""Test that we can backtrace correctly through an ARM Cortex-M Exception return stack"""
diff --git a/lldb/test/API/python_api/default-constructor/sb_filespec.py b/lldb/test/API/python_api/default-constructor/sb_filespec.py
index 4ab5c49c37eb..5dd78b1ace98 100644
--- a/lldb/test/API/python_api/default-constructor/sb_filespec.py
+++ b/lldb/test/API/python_api/default-constructor/sb_filespec.py
@@ -10,5 +10,5 @@ def fuzz_obj(obj):
obj.ResolveExecutableLocation()
obj.GetFilename()
obj.GetDirectory()
- obj.GetPath(None, 0)
+ obj.GetPath(1)
obj.GetDescription(lldb.SBStream())
diff --git a/llvm/include/llvm/ADT/StringExtras.h b/llvm/include/llvm/ADT/StringExtras.h
index 7d81c63485be..2440e7678a83 100644
--- a/llvm/include/llvm/ADT/StringExtras.h
+++ b/llvm/include/llvm/ADT/StringExtras.h
@@ -529,13 +529,15 @@ inline std::string join_items(Sep Separator, Args &&... Items) {
class ListSeparator {
bool First = true;
StringRef Separator;
+ StringRef Prefix;
public:
- ListSeparator(StringRef Separator = ", ") : Separator(Separator) {}
+ ListSeparator(StringRef Separator = ", ", StringRef Prefix = "")
+ : Separator(Separator), Prefix(Prefix) {}
operator StringRef() {
if (First) {
First = false;
- return {};
+ return Prefix;
}
return Separator;
}
diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index 8876e4ed6ae4..e5a6c8cc0a6a 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -2316,10 +2316,6 @@ private:
/// an add rec on said loop.
void getUsedLoops(const SCEV *S, SmallPtrSetImpl<const Loop *> &LoopsUsed);
- /// Try to match the pattern generated by getURemExpr(A, B). If successful,
- /// Assign A and B to LHS and RHS, respectively.
- LLVM_ABI bool matchURem(const SCEV *Expr, const SCEV *&LHS, const SCEV *&RHS);
-
/// Look for a SCEV expression with type `SCEVType` and operands `Ops` in
/// `UniqueSCEVs`. Return if found, else nullptr.
SCEV *findExistingSCEVInCache(SCEVTypes SCEVType, ArrayRef<const SCEV *> Ops);
diff --git a/llvm/include/llvm/Analysis/ScalarEvolutionPatternMatch.h b/llvm/include/llvm/Analysis/ScalarEvolutionPatternMatch.h
index 07a482d4f166..871028de3163 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolutionPatternMatch.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolutionPatternMatch.h
@@ -252,6 +252,80 @@ m_scev_UDiv(const Op0_t &Op0, const Op1_t &Op1) {
return m_scev_Binary<SCEVUDivExpr>(Op0, Op1);
}
+/// Match unsigned remainder pattern.
+/// Matches patterns generated by getURemExpr.
+template <typename Op0_t, typename Op1_t> struct SCEVURem_match {
+ Op0_t Op0;
+ Op1_t Op1;
+ ScalarEvolution &SE;
+
+ SCEVURem_match(Op0_t Op0, Op1_t Op1, ScalarEvolution &SE)
+ : Op0(Op0), Op1(Op1), SE(SE) {}
+
+ bool match(const SCEV *Expr) const {
+ if (Expr->getType()->isPointerTy())
+ return false;
+
+ // Try to match 'zext (trunc A to iB) to iY', which is used
+ // for URem with constant power-of-2 second operands. Make sure the size of
+ // the operand A matches the size of the whole expressions.
+ const SCEV *LHS;
+ if (SCEVPatternMatch::match(Expr, m_scev_ZExt(m_scev_Trunc(m_SCEV(LHS))))) {
+ Type *TruncTy = cast<SCEVZeroExtendExpr>(Expr)->getOperand()->getType();
+ // Bail out if the type of the LHS is larger than the type of the
+ // expression for now.
+ if (SE.getTypeSizeInBits(LHS->getType()) >
+ SE.getTypeSizeInBits(Expr->getType()))
+ return false;
+ if (LHS->getType() != Expr->getType())
+ LHS = SE.getZeroExtendExpr(LHS, Expr->getType());
+ const SCEV *RHS =
+ SE.getConstant(APInt(SE.getTypeSizeInBits(Expr->getType()), 1)
+ << SE.getTypeSizeInBits(TruncTy));
+ return Op0.match(LHS) && Op1.match(RHS);
+ }
+ const auto *Add = dyn_cast<SCEVAddExpr>(Expr);
+ if (Add == nullptr || Add->getNumOperands() != 2)
+ return false;
+
+ const SCEV *A = Add->getOperand(1);
+ const auto *Mul = dyn_cast<SCEVMulExpr>(Add->getOperand(0));
+
+ if (Mul == nullptr)
+ return false;
+
+ const auto MatchURemWithDivisor = [&](const SCEV *B) {
+ // (SomeExpr + (-(SomeExpr / B) * B)).
+ if (Expr == SE.getURemExpr(A, B))
+ return Op0.match(A) && Op1.match(B);
+ return false;
+ };
+
+ // (SomeExpr + (-1 * (SomeExpr / B) * B)).
+ if (Mul->getNumOperands() == 3 && isa<SCEVConstant>(Mul->getOperand(0)))
+ return MatchURemWithDivisor(Mul->getOperand(1)) ||
+ MatchURemWithDivisor(Mul->getOperand(2));
+
+ // (SomeExpr + ((-SomeExpr / B) * B)) or (SomeExpr + ((SomeExpr / B) * -B)).
+ if (Mul->getNumOperands() == 2)
+ return MatchURemWithDivisor(Mul->getOperand(1)) ||
+ MatchURemWithDivisor(Mul->getOperand(0)) ||
+ MatchURemWithDivisor(SE.getNegativeSCEV(Mul->getOperand(1))) ||
+ MatchURemWithDivisor(SE.getNegativeSCEV(Mul->getOperand(0)));
+ return false;
+ }
+};
+
+/// Match the mathematical pattern A - (A / B) * B, where A and B can be
+/// arbitrary expressions. Also match zext (trunc A to iB) to iY, which is used
+/// for URem with constant power-of-2 second operands. It's not always easy, as
+/// A and B can be folded (imagine A is X / 2, and B is 4, A / B becomes X / 8).
+template <typename Op0_t, typename Op1_t>
+inline SCEVURem_match<Op0_t, Op1_t> m_scev_URem(Op0_t LHS, Op1_t RHS,
+ ScalarEvolution &SE) {
+ return SCEVURem_match<Op0_t, Op1_t>(LHS, RHS, SE);
+}
+
inline class_match<const Loop> m_Loop() { return class_match<const Loop>(); }
/// Match an affine SCEVAddRecExpr.
diff --git a/llvm/include/llvm/Analysis/StaticDataProfileInfo.h b/llvm/include/llvm/Analysis/StaticDataProfileInfo.h
index fa21eba1377d..f06e7ceaa74c 100644
--- a/llvm/include/llvm/Analysis/StaticDataProfileInfo.h
+++ b/llvm/include/llvm/Analysis/StaticDataProfileInfo.h
@@ -10,6 +10,24 @@
namespace llvm {
+namespace memprof {
+// Represents the eligibility status of a global variable for section prefix
+// annotation. Other than AnnotationOk, each enum value indicates a specific
+// reason for ineligibility.
+enum class AnnotationKind : uint8_t {
+ AnnotationOK,
+ DeclForLinker,
+ ExplicitSection,
+ ReservedName,
+};
+/// Returns the annotation kind of the global variable \p GV.
+AnnotationKind getAnnotationKind(const GlobalVariable &GV);
+
+/// Returns true if the annotation kind of the global variable \p GV is
+/// AnnotationOK.
+bool IsAnnotationOK(const GlobalVariable &GV);
+} // namespace memprof
+
/// A class that holds the constants that represent static data and their
/// profile information and provides methods to operate on them.
class StaticDataProfileInfo {
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
index 1694a33510d7..46b3d53a4b40 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -472,7 +472,7 @@ __OMP_RTL(__kmpc_target_init, false, Int32, KernelEnvironmentPtr, KernelLaunchEn
__OMP_RTL(__kmpc_target_deinit, false, Void,)
__OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr)
__OMP_RTL(__kmpc_parallel_51, false, Void, IdentPtr, Int32, Int32, Int32, Int32,
- FuncPtrTy, VoidPtr, VoidPtrPtr, SizeTy)
+ FuncPtrTy, FuncPtrTy, VoidPtrPtr, SizeTy)
__OMP_RTL(__kmpc_for_static_loop_4, false, Void, IdentPtr, VoidPtr, VoidPtr, Int32, Int32, Int32, Int8)
__OMP_RTL(__kmpc_for_static_loop_4u, false, Void, IdentPtr, VoidPtr, VoidPtr, Int32, Int32, Int32, Int8)
__OMP_RTL(__kmpc_for_static_loop_8, false, Void, IdentPtr, VoidPtr, VoidPtr, Int64, Int64, Int64, Int8)
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 00c3dbbf3e80..3fab6b0572cb 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -1774,7 +1774,7 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty,
{
const SCEV *LHS;
const SCEV *RHS;
- if (matchURem(Op, LHS, RHS))
+ if (match(Op, m_scev_URem(m_SCEV(LHS), m_SCEV(RHS), *this)))
return getURemExpr(getZeroExtendExpr(LHS, Ty, Depth + 1),
getZeroExtendExpr(RHS, Ty, Depth + 1));
}
@@ -2699,17 +2699,12 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
}
// Canonicalize (-1 * urem X, Y) + X --> (Y * X/Y)
- if (Ops.size() == 2) {
- const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[0]);
- if (Mul && Mul->getNumOperands() == 2 &&
- Mul->getOperand(0)->isAllOnesValue()) {
- const SCEV *X;
- const SCEV *Y;
- if (matchURem(Mul->getOperand(1), X, Y) && X == Ops[1]) {
- return getMulExpr(Y, getUDivExpr(X, Y));
- }
- }
- }
+ const SCEV *Y;
+ if (Ops.size() == 2 &&
+ match(Ops[0],
+ m_scev_Mul(m_scev_AllOnes(),
+ m_scev_URem(m_scev_Specific(Ops[1]), m_SCEV(Y), *this))))
+ return getMulExpr(Y, getUDivExpr(Ops[1], Y));
// Skip past any other cast SCEVs.
while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddExpr)
@@ -15410,65 +15405,6 @@ void PredicatedScalarEvolution::print(raw_ostream &OS, unsigned Depth) const {
}
}
-// Match the mathematical pattern A - (A / B) * B, where A and B can be
-// arbitrary expressions. Also match zext (trunc A to iB) to iY, which is used
-// for URem with constant power-of-2 second operands.
-// It's not always easy, as A and B can be folded (imagine A is X / 2, and B is
-// 4, A / B becomes X / 8).
-bool ScalarEvolution::matchURem(const SCEV *Expr, const SCEV *&LHS,
- const SCEV *&RHS) {
- if (Expr->getType()->isPointerTy())
- return false;
-
- // Try to match 'zext (trunc A to iB) to iY', which is used
- // for URem with constant power-of-2 second operands. Make sure the size of
- // the operand A matches the size of the whole expressions.
- if (match(Expr, m_scev_ZExt(m_scev_Trunc(m_SCEV(LHS))))) {
- Type *TruncTy = cast<SCEVZeroExtendExpr>(Expr)->getOperand()->getType();
- // Bail out if the type of the LHS is larger than the type of the
- // expression for now.
- if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(Expr->getType()))
- return false;
- if (LHS->getType() != Expr->getType())
- LHS = getZeroExtendExpr(LHS, Expr->getType());
- RHS = getConstant(APInt(getTypeSizeInBits(Expr->getType()), 1)
- << getTypeSizeInBits(TruncTy));
- return true;
- }
- const auto *Add = dyn_cast<SCEVAddExpr>(Expr);
- if (Add == nullptr || Add->getNumOperands() != 2)
- return false;
-
- const SCEV *A = Add->getOperand(1);
- const auto *Mul = dyn_cast<SCEVMulExpr>(Add->getOperand(0));
-
- if (Mul == nullptr)
- return false;
-
- const auto MatchURemWithDivisor = [&](const SCEV *B) {
- // (SomeExpr + (-(SomeExpr / B) * B)).
- if (Expr == getURemExpr(A, B)) {
- LHS = A;
- RHS = B;
- return true;
- }
- return false;
- };
-
- // (SomeExpr + (-1 * (SomeExpr / B) * B)).
- if (Mul->getNumOperands() == 3 && isa<SCEVConstant>(Mul->getOperand(0)))
- return MatchURemWithDivisor(Mul->getOperand(1)) ||
- MatchURemWithDivisor(Mul->getOperand(2));
-
- // (SomeExpr + ((-SomeExpr / B) * B)) or (SomeExpr + ((SomeExpr / B) * -B)).
- if (Mul->getNumOperands() == 2)
- return MatchURemWithDivisor(Mul->getOperand(1)) ||
- MatchURemWithDivisor(Mul->getOperand(0)) ||
- MatchURemWithDivisor(getNegativeSCEV(Mul->getOperand(1))) ||
- MatchURemWithDivisor(getNegativeSCEV(Mul->getOperand(0)));
- return false;
-}
-
ScalarEvolution::LoopGuards
ScalarEvolution::LoopGuards::collect(const Loop *L, ScalarEvolution &SE) {
BasicBlock *Header = L->getHeader();
@@ -15689,20 +15625,18 @@ void ScalarEvolution::LoopGuards::collectFromBlock(
if (Predicate == CmpInst::ICMP_EQ && match(RHS, m_scev_Zero())) {
// If LHS is A % B, i.e. A % B == 0, rewrite A to (A /u B) * B to
// explicitly express that.
- const SCEV *URemLHS = nullptr;
+ const SCEVUnknown *URemLHS = nullptr;
const SCEV *URemRHS = nullptr;
- if (SE.matchURem(LHS, URemLHS, URemRHS)) {
- if (const SCEVUnknown *LHSUnknown = dyn_cast<SCEVUnknown>(URemLHS)) {
- auto I = RewriteMap.find(LHSUnknown);
- const SCEV *RewrittenLHS =
- I != RewriteMap.end() ? I->second : LHSUnknown;
- RewrittenLHS = ApplyDivisibiltyOnMinMaxExpr(RewrittenLHS, URemRHS);
- const auto *Multiple =
- SE.getMulExpr(SE.getUDivExpr(RewrittenLHS, URemRHS), URemRHS);
- RewriteMap[LHSUnknown] = Multiple;
- ExprsToRewrite.push_back(LHSUnknown);
- return;
- }
+ if (match(LHS,
+ m_scev_URem(m_SCEVUnknown(URemLHS), m_SCEV(URemRHS), SE))) {
+ auto I = RewriteMap.find(URemLHS);
+ const SCEV *RewrittenLHS = I != RewriteMap.end() ? I->second : URemLHS;
+ RewrittenLHS = ApplyDivisibiltyOnMinMaxExpr(RewrittenLHS, URemRHS);
+ const auto *Multiple =
+ SE.getMulExpr(SE.getUDivExpr(RewrittenLHS, URemRHS), URemRHS);
+ RewriteMap[URemLHS] = Multiple;
+ ExprsToRewrite.push_back(URemLHS);
+ return;
}
}
diff --git a/llvm/lib/Analysis/StaticDataProfileInfo.cpp b/llvm/lib/Analysis/StaticDataProfileInfo.cpp
index b036b2dde770..1f751ee5e09d 100644
--- a/llvm/lib/Analysis/StaticDataProfileInfo.cpp
+++ b/llvm/lib/Analysis/StaticDataProfileInfo.cpp
@@ -6,6 +6,46 @@
#include "llvm/ProfileData/InstrProf.h"
using namespace llvm;
+
+namespace llvm {
+namespace memprof {
+// Returns true iff the global variable has custom section either by
+// __attribute__((section("name")))
+// (https://clang.llvm.org/docs/AttributeReference.html#section-declspec-allocate)
+// or #pragma clang section directives
+// (https://clang.llvm.org/docs/LanguageExtensions.html#specifying-section-names-for-global-objects-pragma-clang-section).
+static bool hasExplicitSectionName(const GlobalVariable &GVar) {
+ if (GVar.hasSection())
+ return true;
+
+ auto Attrs = GVar.getAttributes();
+ if (Attrs.hasAttribute("bss-section") || Attrs.hasAttribute("data-section") ||
+ Attrs.hasAttribute("relro-section") ||
+ Attrs.hasAttribute("rodata-section"))
+ return true;
+ return false;
+}
+
+AnnotationKind getAnnotationKind(const GlobalVariable &GV) {
+ if (GV.isDeclarationForLinker())
+ return AnnotationKind::DeclForLinker;
+ // Skip 'llvm.'-prefixed global variables conservatively because they are
+ // often handled specially,
+ StringRef Name = GV.getName();
+ if (Name.starts_with("llvm."))
+ return AnnotationKind::ReservedName;
+ // Respect user-specified custom data sections.
+ if (hasExplicitSectionName(GV))
+ return AnnotationKind::ExplicitSection;
+ return AnnotationKind::AnnotationOK;
+}
+
+bool IsAnnotationOK(const GlobalVariable &GV) {
+ return getAnnotationKind(GV) == AnnotationKind::AnnotationOK;
+}
+} // namespace memprof
+} // namespace llvm
+
void StaticDataProfileInfo::addConstantProfileCount(
const Constant *C, std::optional<uint64_t> Count) {
if (!Count) {
diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp
index 96428cd7ea84..1d54d7233686 100644
--- a/llvm/lib/CodeGen/MIRPrinter.cpp
+++ b/llvm/lib/CodeGen/MIRPrinter.cpp
@@ -862,48 +862,46 @@ static void printMI(raw_ostream &OS, MFPrintState &State,
OS << TII->getName(MI.getOpcode());
- LS = ListSeparator();
+ // Print a space after the opcode if any additional tokens are printed.
+ LS = ListSeparator(", ", " ");
- if (I < E) {
- OS << ' ';
- for (; I < E; ++I) {
- OS << LS;
- printMIOperand(OS, State, MI, I, TRI, TII, ShouldPrintRegisterTies,
- PrintedTypes, MRI, /*PrintDef=*/true);
- }
+ for (; I < E; ++I) {
+ OS << LS;
+ printMIOperand(OS, State, MI, I, TRI, TII, ShouldPrintRegisterTies,
+ PrintedTypes, MRI, /*PrintDef=*/true);
}
// Print any optional symbols attached to this instruction as-if they were
// operands.
if (MCSymbol *PreInstrSymbol = MI.getPreInstrSymbol()) {
- OS << LS << " pre-instr-symbol ";
+ OS << LS << "pre-instr-symbol ";
MachineOperand::printSymbol(OS, *PreInstrSymbol);
}
if (MCSymbol *PostInstrSymbol = MI.getPostInstrSymbol()) {
- OS << LS << " post-instr-symbol ";
+ OS << LS << "post-instr-symbol ";
MachineOperand::printSymbol(OS, *PostInstrSymbol);
}
if (MDNode *HeapAllocMarker = MI.getHeapAllocMarker()) {
- OS << LS << " heap-alloc-marker ";
+ OS << LS << "heap-alloc-marker ";
HeapAllocMarker->printAsOperand(OS, State.MST);
}
if (MDNode *PCSections = MI.getPCSections()) {
- OS << LS << " pcsections ";
+ OS << LS << "pcsections ";
PCSections->printAsOperand(OS, State.MST);
}
if (MDNode *MMRA = MI.getMMRAMetadata()) {
- OS << LS << " mmra ";
+ OS << LS << "mmra ";
MMRA->printAsOperand(OS, State.MST);
}
if (uint32_t CFIType = MI.getCFIType())
- OS << LS << " cfi-type " << CFIType;
+ OS << LS << "cfi-type " << CFIType;
if (auto Num = MI.peekDebugInstrNum())
- OS << LS << " debug-instr-number " << Num;
+ OS << LS << "debug-instr-number " << Num;
if (PrintLocations) {
if (const DebugLoc &DL = MI.getDebugLoc()) {
- OS << LS << " debug-location ";
+ OS << LS << "debug-location ";
DL->printAsOperand(OS, State.MST);
}
}
diff --git a/llvm/lib/CodeGen/StaticDataAnnotator.cpp b/llvm/lib/CodeGen/StaticDataAnnotator.cpp
index 53a9ab4dbda0..eac201201708 100644
--- a/llvm/lib/CodeGen/StaticDataAnnotator.cpp
+++ b/llvm/lib/CodeGen/StaticDataAnnotator.cpp
@@ -75,22 +75,11 @@ bool StaticDataAnnotator::runOnModule(Module &M) {
bool Changed = false;
for (auto &GV : M.globals()) {
- if (GV.isDeclarationForLinker())
+ if (!llvm::memprof::IsAnnotationOK(GV))
continue;
- // The implementation below assumes prior passes don't set section prefixes,
- // and specifically do 'assign' rather than 'update'. So report error if a
- // section prefix is already set.
- if (auto maybeSectionPrefix = GV.getSectionPrefix();
- maybeSectionPrefix && !maybeSectionPrefix->empty())
- llvm::report_fatal_error("Global variable " + GV.getName() +
- " already has a section prefix " +
- *maybeSectionPrefix);
-
StringRef SectionPrefix = SDPI->getConstantSectionPrefix(&GV, PSI);
- if (SectionPrefix.empty())
- continue;
-
+ // setSectionPrefix returns true if the section prefix is updated.
Changed |= GV.setSectionPrefix(SectionPrefix);
}
diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp
index e22dc2507d54..1593a401bcb2 100644
--- a/llvm/lib/CodeGen/StaticDataSplitter.cpp
+++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp
@@ -130,10 +130,8 @@ StaticDataSplitter::getConstant(const MachineOperand &Op,
if (Op.isGlobal()) {
// Find global variables with local linkage.
const GlobalVariable *GV = getLocalLinkageGlobalVariable(Op.getGlobal());
- // Skip 'llvm.'-prefixed global variables conservatively because they are
- // often handled specially, and skip those not in static data
- // sections.
- if (!GV || GV->getName().starts_with("llvm.") ||
+ // Skip those not eligible for annotation or not in static data sections.
+ if (!GV || !llvm::memprof::IsAnnotationOK(*GV) ||
!inStaticDataSection(*GV, TM))
return nullptr;
return GV;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
index dbe74b1b08f8..5700468e2420 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
@@ -2394,15 +2394,19 @@ bool SchedGroup::canAddMI(const MachineInstr &MI) const {
else if (((SGMask & SchedGroupMask::ALU) != SchedGroupMask::NONE) &&
(TII->isVALU(MI) || TII->isMFMAorWMMA(MI) || TII->isSALU(MI) ||
TII->isTRANS(MI)))
- Result = true;
+ Result = !MI.mayLoadOrStore();
else if (((SGMask & SchedGroupMask::VALU) != SchedGroupMask::NONE) &&
- TII->isVALU(MI) && !TII->isMFMAorWMMA(MI) && !TII->isTRANS(MI))
- Result = true;
+ TII->isVALU(MI) && !TII->isMFMAorWMMA(MI) && !TII->isTRANS(MI)) {
+ // Some memory instructions may be marked as VALU (e.g. BUFFER_LOAD_*_LDS).
+ // For our purposes, these shall not be classified as VALU as this results
+ // in unexpected behavior.
+ Result = !MI.mayLoadOrStore();
+ }
else if (((SGMask & SchedGroupMask::SALU) != SchedGroupMask::NONE) &&
TII->isSALU(MI))
- Result = true;
+ Result = !MI.mayLoadOrStore();
else if (((SGMask & SchedGroupMask::MFMA) != SchedGroupMask::NONE) &&
TII->isMFMAorWMMA(MI))
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 278cf6dc1d78..1b7cb9bd2169 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -168,13 +168,13 @@ struct DemandedFields {
// If this is true, we demand that VTYPE is set to some legal state, i.e. that
// vill is unset.
bool VILL = false;
- bool UseTWiden = false;
- bool UseAltFmt = false;
+ bool TWiden = false;
+ bool AltFmt = false;
// Return true if any part of VTYPE was used
bool usedVTYPE() const {
return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy || VILL ||
- UseTWiden || UseAltFmt;
+ TWiden || AltFmt;
}
// Return true if any property of VL was used
@@ -190,8 +190,8 @@ struct DemandedFields {
TailPolicy = true;
MaskPolicy = true;
VILL = true;
- UseTWiden = true;
- UseAltFmt = true;
+ TWiden = true;
+ AltFmt = true;
}
// Mark all VL properties as demanded
@@ -217,8 +217,8 @@ struct DemandedFields {
TailPolicy |= B.TailPolicy;
MaskPolicy |= B.MaskPolicy;
VILL |= B.VILL;
- UseAltFmt |= B.UseAltFmt;
- UseTWiden |= B.UseTWiden;
+ AltFmt |= B.AltFmt;
+ TWiden |= B.TWiden;
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -266,8 +266,8 @@ struct DemandedFields {
OS << "TailPolicy=" << TailPolicy << ", ";
OS << "MaskPolicy=" << MaskPolicy << ", ";
OS << "VILL=" << VILL << ", ";
- OS << "UseAltFmt=" << UseAltFmt << ", ";
- OS << "UseTWiden=" << UseTWiden;
+ OS << "AltFmt=" << AltFmt << ", ";
+ OS << "TWiden=" << TWiden;
OS << "}";
}
#endif
@@ -337,13 +337,13 @@ static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType,
if (Used.MaskPolicy && RISCVVType::isMaskAgnostic(CurVType) !=
RISCVVType::isMaskAgnostic(NewVType))
return false;
- if (Used.UseTWiden && (RISCVVType::hasXSfmmWiden(CurVType) !=
- RISCVVType::hasXSfmmWiden(NewVType) ||
- (RISCVVType::hasXSfmmWiden(CurVType) &&
- RISCVVType::getXSfmmWiden(CurVType) !=
- RISCVVType::getXSfmmWiden(NewVType))))
+ if (Used.TWiden && (RISCVVType::hasXSfmmWiden(CurVType) !=
+ RISCVVType::hasXSfmmWiden(NewVType) ||
+ (RISCVVType::hasXSfmmWiden(CurVType) &&
+ RISCVVType::getXSfmmWiden(CurVType) !=
+ RISCVVType::getXSfmmWiden(NewVType))))
return false;
- if (Used.UseAltFmt &&
+ if (Used.AltFmt &&
RISCVVType::isAltFmt(CurVType) != RISCVVType::isAltFmt(NewVType))
return false;
return true;
@@ -497,10 +497,10 @@ DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) {
Res.TailPolicy = false;
}
- Res.UseAltFmt = RISCVII::getAltFmtType(MI.getDesc().TSFlags) !=
- RISCVII::AltFmtType::DontCare;
- Res.UseTWiden = RISCVII::hasTWidenOp(MI.getDesc().TSFlags) ||
- RISCVInstrInfo::isXSfmmVectorConfigInstr(MI);
+ Res.AltFmt = RISCVII::getAltFmtType(MI.getDesc().TSFlags) !=
+ RISCVII::AltFmtType::DontCare;
+ Res.TWiden = RISCVII::hasTWidenOp(MI.getDesc().TSFlags) ||
+ RISCVInstrInfo::isXSfmmVectorConfigInstr(MI);
return Res;
}
@@ -1347,8 +1347,8 @@ void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
IncomingInfo.getTailAgnostic(),
(Demanded.MaskPolicy ? IncomingInfo : Info).getMaskAgnostic() ||
IncomingInfo.getMaskAgnostic(),
- (Demanded.UseAltFmt ? IncomingInfo : Info).getAltFmt(),
- Demanded.UseTWiden ? IncomingInfo.getTWiden() : 0);
+ (Demanded.AltFmt ? IncomingInfo : Info).getAltFmt(),
+ Demanded.TWiden ? IncomingInfo.getTWiden() : 0);
// If we only knew the sew/lmul ratio previously, replace the VTYPE but keep
// the AVL.
@@ -1945,9 +1945,12 @@ bool RISCVInsertVSETVLI::insertVSETMTK(MachineBasicBlock &MBB,
.addImm(Log2_32(CurrInfo.getTWiden()) + 1);
Changed = true;
+ Register Reg = Op.getReg();
+ Op.setReg(Register());
+ Op.setIsKill(false);
if (LIS) {
LIS->InsertMachineInstrInMaps(*TmpMI);
- LiveInterval &LI = LIS->getInterval(Op.getReg());
+ LiveInterval &LI = LIS->getInterval(Reg);
// Erase the AVL operand from the instruction.
LIS->shrinkToUses(&LI);
@@ -1955,9 +1958,6 @@ bool RISCVInsertVSETVLI::insertVSETMTK(MachineBasicBlock &MBB,
// SmallVector<LiveInterval *> SplitLIs;
// LIS->splitSeparateComponents(LI, SplitLIs);
}
-
- Op.setReg(RISCV::NoRegister);
- Op.setIsKill(false);
}
return Changed;
}
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp
index c86092bd51ed..a6ec6c120776 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/MemoryProfileInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/StaticDataProfileInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
@@ -194,6 +195,30 @@ static bool isAllocationWithHotColdVariant(const Function *Callee,
}
}
+static void HandleUnsupportedAnnotationKinds(GlobalVariable &GVar,
+ AnnotationKind Kind) {
+ assert(Kind != llvm::memprof::AnnotationKind::AnnotationOK &&
+ "Should not handle AnnotationOK here");
+ SmallString<32> Reason;
+ switch (Kind) {
+ case llvm::memprof::AnnotationKind::ExplicitSection:
+ ++NumOfMemProfExplicitSectionGlobalVars;
+ Reason.append("explicit section name");
+ break;
+ case llvm::memprof::AnnotationKind::DeclForLinker:
+ Reason.append("linker declaration");
+ break;
+ case llvm::memprof::AnnotationKind::ReservedName:
+ Reason.append("name starts with `llvm.`");
+ break;
+ default:
+ llvm_unreachable("Unexpected annotation kind");
+ }
+ LLVM_DEBUG(dbgs() << "Skip annotation for " << GVar.getName() << " due to "
+ << Reason << ".\n");
+ return;
+}
+
struct AllocMatchInfo {
uint64_t TotalSize = 0;
AllocationType AllocType = AllocationType::None;
@@ -775,29 +800,13 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
return PreservedAnalyses::none();
}
-// Returns true iff the global variable has custom section either by
-// __attribute__((section("name")))
-// (https://clang.llvm.org/docs/AttributeReference.html#section-declspec-allocate)
-// or #pragma clang section directives
-// (https://clang.llvm.org/docs/LanguageExtensions.html#specifying-section-names-for-global-objects-pragma-clang-section).
-static bool hasExplicitSectionName(const GlobalVariable &GVar) {
- if (GVar.hasSection())
- return true;
-
- auto Attrs = GVar.getAttributes();
- if (Attrs.hasAttribute("bss-section") || Attrs.hasAttribute("data-section") ||
- Attrs.hasAttribute("relro-section") ||
- Attrs.hasAttribute("rodata-section"))
- return true;
- return false;
-}
-
bool MemProfUsePass::annotateGlobalVariables(
Module &M, const memprof::DataAccessProfData *DataAccessProf) {
if (!AnnotateStaticDataSectionPrefix || M.globals().empty())
return false;
if (!DataAccessProf) {
+ M.addModuleFlag(Module::Warning, "EnableDataAccessProf", 0U);
M.getContext().diagnose(DiagnosticInfoPGOProfile(
MemoryProfileFileName.data(),
StringRef("Data access profiles not found in memprof. Ignore "
@@ -805,6 +814,7 @@ bool MemProfUsePass::annotateGlobalVariables(
DS_Warning));
return false;
}
+ M.addModuleFlag(Module::Warning, "EnableDataAccessProf", 1U);
bool Changed = false;
// Iterate all global variables in the module and annotate them based on
@@ -815,13 +825,9 @@ bool MemProfUsePass::annotateGlobalVariables(
for (GlobalVariable &GVar : M.globals()) {
assert(!GVar.getSectionPrefix().has_value() &&
"GVar shouldn't have section prefix yet");
- if (GVar.isDeclarationForLinker())
- continue;
-
- if (hasExplicitSectionName(GVar)) {
- ++NumOfMemProfExplicitSectionGlobalVars;
- LLVM_DEBUG(dbgs() << "Global variable " << GVar.getName()
- << " has explicit section name. Skip annotating.\n");
+ auto Kind = llvm::memprof::getAnnotationKind(GVar);
+ if (Kind != llvm::memprof::AnnotationKind::AnnotationOK) {
+ HandleUnsupportedAnnotationKinds(GVar, Kind);
continue;
}
@@ -831,7 +837,6 @@ bool MemProfUsePass::annotateGlobalVariables(
// TODO: Track string content hash in the profiles and compute it inside the
// compiler to categeorize the hotness string literals.
if (Name.starts_with(".str")) {
-
LLVM_DEBUG(dbgs() << "Skip annotating string literal " << Name << "\n");
continue;
}
diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index 45cee1e7da62..9035e58a707c 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -526,7 +526,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
// Recognize the canonical representation of an unsimplifed urem.
const SCEV *URemLHS = nullptr;
const SCEV *URemRHS = nullptr;
- if (SE.matchURem(S, URemLHS, URemRHS)) {
+ if (match(S, m_scev_URem(m_SCEV(URemLHS), m_SCEV(URemRHS), SE))) {
Value *LHS = expand(URemLHS);
Value *RHS = expand(URemRHS);
return InsertBinop(Instruction::URem, LHS, RHS, SCEV::FlagAnyWrap,
diff --git a/llvm/test/CodeGen/AMDGPU/sched.group.classification.mir b/llvm/test/CodeGen/AMDGPU/sched.group.classification.mir
new file mode 100644
index 000000000000..a4aad574aaaf
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/sched.group.classification.mir
@@ -0,0 +1,59 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=machine-scheduler -o - %s | FileCheck %s
+
+---
+name: buffer_load_lds_not_valu
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+ ; CHECK-LABEL: name: buffer_load_lds_not_valu
+ ; CHECK: liveins: $vgpr0_vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $exec = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[V_ADD_U32_e32_]], implicit $exec
+ ; CHECK-NEXT: $m0 = S_MOV_B32 0
+ ; CHECK-NEXT: BUFFER_LOAD_DWORDX4_LDS_OFFEN [[DEF]], [[DEF1]], 0, 0, 0, 0, implicit $exec, implicit $m0
+ ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_ADD_U32_e32_]], [[V_ADD_U32_e32_1]], implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_ADD_U32_e32_1]], [[V_ADD_U32_e32_2]], implicit $exec
+ ; CHECK-NEXT: $m0 = S_MOV_B32 1
+ ; CHECK-NEXT: BUFFER_LOAD_DWORDX4_LDS_OFFEN [[DEF]], [[DEF1]], 0, 0, 0, 0, implicit $exec, implicit $m0
+ ; CHECK-NEXT: [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]], implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_ADD_U32_e32_3]], [[V_ADD_U32_e32_4]], implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_ADD_U32_e32_4]], [[V_ADD_U32_e32_5]], implicit $exec
+ ; CHECK-NEXT: dead [[V_ADD_U32_e32_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_ADD_U32_e32_5]], [[V_ADD_U32_e32_6]], implicit $exec
+ ; CHECK-NEXT: SCHED_GROUP_BARRIER 2, 2, 0
+ ; CHECK-NEXT: SCHED_GROUP_BARRIER 4, 1, 0
+ ; CHECK-NEXT: SCHED_GROUP_BARRIER 2, 2, 0
+ ; CHECK-NEXT: SCHED_GROUP_BARRIER 4, 1, 0
+ ; CHECK-NEXT: SCHED_GROUP_BARRIER 2, 4, 0
+ ; CHECK-NEXT: S_ENDPGM 0
+ $exec = IMPLICIT_DEF
+ %0:vgpr_32 = IMPLICIT_DEF
+ %1:sgpr_128 = IMPLICIT_DEF
+ %2:vgpr_32 = IMPLICIT_DEF
+ %3:vgpr_32 = IMPLICIT_DEF
+ %4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
+ %5:vgpr_32 = V_ADD_U32_e32 %3, %4, implicit $exec
+ $m0 = S_MOV_B32 0
+ BUFFER_LOAD_DWORDX4_LDS_OFFEN %0, %1, 0, 0, 0, 0, implicit $exec, implicit $m0
+ $m0 = S_MOV_B32 1
+ BUFFER_LOAD_DWORDX4_LDS_OFFEN %0, %1, 0, 0, 0, 0, implicit $exec, implicit $m0
+ %6:vgpr_32 = V_ADD_U32_e32 %4, %5, implicit $exec
+ %7:vgpr_32 = V_ADD_U32_e32 %5, %6, implicit $exec
+ %8:vgpr_32 = V_ADD_U32_e32 %6, %7, implicit $exec
+ %9:vgpr_32 = V_ADD_U32_e32 %7, %8, implicit $exec
+ %10:vgpr_32 = V_ADD_U32_e32 %8, %9, implicit $exec
+ %11:vgpr_32 = V_ADD_U32_e32 %9, %10, implicit $exec
+ SCHED_GROUP_BARRIER 2, 2, 0
+ SCHED_GROUP_BARRIER 4, 1 ,0
+ SCHED_GROUP_BARRIER 2, 2, 0
+ SCHED_GROUP_BARRIER 4, 1 ,0
+ SCHED_GROUP_BARRIER 2, 4, 0
+ S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/MIR/AArch64/return-address-signing.mir b/llvm/test/CodeGen/MIR/AArch64/return-address-signing.mir
index 1030917c8741..302f70fc1519 100644
--- a/llvm/test/CodeGen/MIR/AArch64/return-address-signing.mir
+++ b/llvm/test/CodeGen/MIR/AArch64/return-address-signing.mir
@@ -1,4 +1,4 @@
-# RUN: llc -mtriple=aarch64 -run-pass=prologepilog -run-pass=aarch64-ptrauth -o - %s 2>&1 | FileCheck %s
+# RUN: llc -mtriple=aarch64 -run-pass=prologepilog -run-pass=aarch64-ptrauth -o - %s 2>&1 | FileCheck --strict-whitespace %s
--- |
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64"
diff --git a/llvm/test/CodeGen/X86/global-variable-partition-with-dap.ll b/llvm/test/CodeGen/X86/global-variable-partition-with-dap.ll
index a0c243be0e7e..f3950b75a969 100644
--- a/llvm/test/CodeGen/X86/global-variable-partition-with-dap.ll
+++ b/llvm/test/CodeGen/X86/global-variable-partition-with-dap.ll
@@ -1,16 +1,15 @@
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
-;; A minimal test case. llc will crash if global variables already has a section
-;; prefix. Subsequent PRs will expand on this test case to test the hotness
-;; reconciliation implementation.
-
-; RUN: not llc -mtriple=x86_64-unknown-linux-gnu -relocation-model=pic \
+;; A minimal test case. Subsequent PRs will expand on this test case
+;; (e.g., with more functions, variables and profiles) and test the hotness
+;; reconcillation implementation.
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -relocation-model=pic \
; RUN: -partition-static-data-sections=true \
; RUN: -data-sections=true -unique-section-names=false \
-; RUN: %s -o - 2>&1 | FileCheck %s --check-prefix=ERR
+; RUN: %s -o - 2>&1 | FileCheck %s --check-prefix=IR
-; ERR: Global variable hot_bss already has a section prefix hot
+; IR: .section .bss.hot.,"aw"
@hot_bss = internal global i32 0, !section_prefix !17
diff --git a/llvm/test/CodeGen/X86/global-variable-partition.ll b/llvm/test/CodeGen/X86/global-variable-partition.ll
index ce06d1712f84..604b4fd5a96e 100644
--- a/llvm/test/CodeGen/X86/global-variable-partition.ll
+++ b/llvm/test/CodeGen/X86/global-variable-partition.ll
@@ -106,23 +106,31 @@ target triple = "x86_64-unknown-linux-gnu"
; UNIQ-NEXT: .section .data.unlikely.,"aw",@progbits,unique,8
; AGG-NEXT: .section .data.unlikely.,"aw",@progbits
+;; The `.section` directive is omitted for .data with -unique-section-names=false.
+; See MCSectionELF::shouldOmitSectionDirective for the implementation details.
+
; For @data_with_unknown_hotness
; SYM: .type .Ldata_with_unknown_hotness,@object # @data_with_unknown_hotness
; SYM: .section .data..Ldata_with_unknown_hotness,"aw",@progbits
; UNIQ: .section .data,"aw",@progbits,unique,9
-; The `.section` directive is omitted for .data with -unique-section-names=false.
-; See MCSectionELF::shouldOmitSectionDirective for the implementation details.
+
; AGG: .data
; COMMON: .Ldata_with_unknown_hotness:
-; For @hot_data_custom_bar_section
-; It has an explicit section attribute 'var' and shouldn't have hot or unlikely suffix.
+; For variables that are not eligible for section prefix annotation
; COMMON: .type hot_data_custom_bar_section,@object
; SYM-NEXT: .section bar,"aw",@progbits
; SYM: hot_data_custom_bar_section
; UNIQ: .section bar,"aw",@progbits
; AGG: .section bar,"aw",@progbits
+; SYM: .section .data.llvm.fake_var,"aw"
+; UNIQ: .section .data,"aw"
+; AGG: .data
+
+;; No section for linker declaration
+; COMMON-NOT: qux
+
@.str = private unnamed_addr constant [5 x i8] c"hot\09\00", align 1
@.str.1 = private unnamed_addr constant [10 x i8] c"%d\09%d\09%d\0A\00", align 1
@hot_relro_array = internal constant [2 x ptr] [ptr @bss2, ptr @data3]
@@ -137,6 +145,8 @@ target triple = "x86_64-unknown-linux-gnu"
@data3 = internal global i32 3
@data_with_unknown_hotness = private global i32 5
@hot_data_custom_bar_section = internal global i32 101 #0
+@llvm.fake_var = internal global i32 123
+@qux = external global i64
define void @cold_func(i32 %0) !prof !15 {
%2 = load i32, ptr @cold_bss
diff --git a/llvm/test/Transforms/PGOProfile/data-access-profile.ll b/llvm/test/Transforms/PGOProfile/data-access-profile.ll
index 29198f34ccbb..205184bdd715 100644
--- a/llvm/test/Transforms/PGOProfile/data-access-profile.ll
+++ b/llvm/test/Transforms/PGOProfile/data-access-profile.ll
@@ -3,55 +3,72 @@
; RUN: rm -rf %t && split-file %s %t && cd %t
-;; Read a text profile and merge it into indexed profile.
+;; Read text profiles and merge them into indexed profiles.
; RUN: llvm-profdata merge --memprof-version=4 memprof.yaml -o memprof.profdata
+; RUN: llvm-profdata merge --memprof-version=4 memprof-no-dap.yaml -o memprof-no-dap.profdata
;; Run optimizer pass on an IR module without IR functions, and test that global
;; variables in the module could be annotated (i.e., no early return),
; RUN: opt -passes='memprof-use<profile-filename=memprof.profdata>' -memprof-annotate-static-data-prefix \
-; RUN: -debug-only=memprof -stats -S funcless-module.ll -o - 2>&1 | FileCheck %s --check-prefixes=LOG,PREFIX,STAT
+; RUN: -debug-only=memprof -stats -S funcless-module.ll -o - 2>&1 | FileCheck %s --check-prefixes=LOG,IR,STAT
;; Run optimizer pass on the IR, and check the section prefix.
; RUN: opt -passes='memprof-use<profile-filename=memprof.profdata>' -memprof-annotate-static-data-prefix \
-; RUN: -debug-only=memprof -stats -S input.ll -o - 2>&1 | FileCheck %s --check-prefixes=LOG,PREFIX,STAT
+; RUN: -debug-only=memprof -stats -S input.ll -o - 2>&1 | FileCheck %s --check-prefixes=LOG,IR,STAT
-;; Run optimizer pass without explicitly setting -memprof-annotate-static-data-prefix.
-;; The output text IR shouldn't have `section_prefix`
+;; Run memprof without providing memprof data. Test that IR has module flag
+;; `EnableDataAccessProf` as 0.
+; RUN: opt -passes='memprof-use<profile-filename=memprof-no-dap.profdata>' -memprof-annotate-static-data-prefix \
+; RUN: -debug-only=memprof -stats -S input.ll -o - 2>&1 | FileCheck %s --check-prefix=FLAG
+
+;; Run memprof without explicitly setting -memprof-annotate-static-data-prefix.
+;; The output text IR shouldn't have `section_prefix` or EnableDataAccessProf module flag.
; RUN: opt -passes='memprof-use<profile-filename=memprof.profdata>' \
-; RUN: -debug-only=memprof -stats -S input.ll -o - | FileCheck %s --implicit-check-not="section_prefix"
+; RUN: -debug-only=memprof -stats -S input.ll -o - | FileCheck %s --check-prefix=FLAGLESS --implicit-check-not="section_prefix"
; LOG: Skip annotating string literal .str
; LOG: Global variable var1 is annotated as hot
; LOG: Global variable var2.llvm.125 is annotated as hot
; LOG: Global variable bar is not annotated
; LOG: Global variable foo is annotated as unlikely
-; LOG: Global variable var3 has explicit section name. Skip annotating.
-; LOG: Global variable var4 has explicit section name. Skip annotating.
+; LOG: Skip annotation for var3 due to explicit section name.
+; LOG: Skip annotation for var4 due to explicit section name.
+; LOG: Skip annotation for llvm.fake_var due to name starts with `llvm.`.
+; LOG: Skip annotation for qux due to linker declaration.
;; String literals are not annotated.
-; PREFIX: @.str = unnamed_addr constant [5 x i8] c"abcde"
-; PREFIX-NOT: section_prefix
-; PREFIX: @var1 = global i32 123, !section_prefix !0
+; IR: @.str = unnamed_addr constant [5 x i8] c"abcde"
+; IR-NOT: section_prefix
+; IR: @var1 = global i32 123, !section_prefix !0
;; @var.llvm.125 will be canonicalized to @var2 for profile look-up.
-; PREFIX-NEXT: @var2.llvm.125 = global i64 0, !section_prefix !0
+; IR-NEXT: @var2.llvm.125 = global i64 0, !section_prefix !0
;; @bar is not seen in hot symbol or known symbol set, so it won't get a section
;; prefix. Test this by testing that there is no section_prefix between @bar and
;; @foo.
-; PREFIX-NEXT: @bar = global i16 3
-; PREFIX-NOT: !section_prefix
+; IR-NEXT: @bar = global i16 3
+; IR-NOT: !section_prefix
;; @foo is unlikely.
-; PREFIX-NEXT: @foo = global i8 2, !section_prefix !1
+; IR-NEXT: @foo = global i8 2, !section_prefix !1
+
+; IR-NEXT: @var3 = constant [2 x i32] [i32 12345, i32 6789], section "sec1"
+; IR-NEXT: @var4 = constant [1 x i64] [i64 98765] #0
+
+; IR: @llvm.fake_var = global i32 123
+; IR-NOT: !section_prefix
+; IR: @qux = external global i64
+; IR-NOT: !section_prefix
-; PREFIX-NEXT: @var3 = constant [2 x i32] [i32 12345, i32 6789], section "sec1"
-; PREFIX-NEXT: @var4 = constant [1 x i64] [i64 98765] #0
+; IR: attributes #0 = { "rodata-section"="sec2" }
-; PREFIX: attributes #0 = { "rodata-section"="sec2" }
+; IR: !0 = !{!"section_prefix", !"hot"}
+; IR-NEXT: !1 = !{!"section_prefix", !"unlikely"}
+; IR-NEXT: !2 = !{i32 2, !"EnableDataAccessProf", i32 1}
-; PREFIX: !0 = !{!"section_prefix", !"hot"}
-; PREFIX-NEXT: !1 = !{!"section_prefix", !"unlikely"}
+; FLAG: !{i32 2, !"EnableDataAccessProf", i32 0}
+; FLAGLESS-NOT: EnableDataAccessProf
; STAT: 1 memprof - Number of global vars annotated with 'unlikely' section prefix.
; STAT: 2 memprof - Number of global vars with user-specified section (not annotated).
@@ -72,6 +89,24 @@ DataAccessProfiles:
- foo
KnownColdStrHashes: [ 999, 1001 ]
...
+;--- memprof-no-dap.yaml
+---
+# A memprof file with without data access profiles. The heap records are simplified
+# to pass profile parsing and don't need to match the IR.
+HeapProfileRecords:
+ - GUID: 0xdeadbeef12345678
+ AllocSites:
+ - Callstack:
+ - { Function: 0x1111111111111111, LineOffset: 11, Column: 10, IsInlineFrame: true }
+ MemInfoBlock:
+ AllocCount: 111
+ TotalSize: 222
+ TotalLifetime: 333
+ TotalLifetimeAccessDensity: 444
+ CallSites:
+ - Frames:
+ - { Function: 0x5555555555555555, LineOffset: 55, Column: 50, IsInlineFrame: true }
+...
;--- input.ll
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
@@ -84,11 +119,14 @@ target triple = "x86_64-unknown-linux-gnu"
@foo = global i8 2
@var3 = constant [2 x i32][i32 12345, i32 6789], section "sec1"
@var4 = constant [1 x i64][i64 98765] #0
+@llvm.fake_var = global i32 123
+@qux = external global i64
define i32 @func() {
%a = load i32, ptr @var1
%b = load i32, ptr @var2.llvm.125
- %ret = call i32 (...) @func_taking_arbitrary_param(i32 %a, i32 %b)
+ %c = load i32, ptr @llvm.fake_var
+ %ret = call i32 (...) @func_taking_arbitrary_param(i32 %a, i32 %b, i32 %c)
ret i32 %ret
}
@@ -108,5 +146,8 @@ target triple = "x86_64-unknown-linux-gnu"
@foo = global i8 2
@var3 = constant [2 x i32][i32 12345, i32 6789], section "sec1"
@var4 = constant [1 x i64][i64 98765] #0
+@llvm.fake_var = global i32 123
+@qux = external global i64
+
attributes #0 = { "rodata-section"="sec2" }
diff --git a/llvm/unittests/ADT/StringExtrasTest.cpp b/llvm/unittests/ADT/StringExtrasTest.cpp
index fbaed38da594..af88f88949b7 100644
--- a/llvm/unittests/ADT/StringExtrasTest.cpp
+++ b/llvm/unittests/ADT/StringExtrasTest.cpp
@@ -290,6 +290,12 @@ TEST(StringExtrasTest, ListSeparator) {
EXPECT_EQ(S, "");
S = LS2;
EXPECT_EQ(S, " ");
+
+ ListSeparator LS3(",", "{");
+ S = LS3;
+ EXPECT_EQ(S, "{");
+ S = LS3;
+ EXPECT_EQ(S, ",");
}
TEST(StringExtrasTest, toStringAPInt) {
diff --git a/llvm/unittests/Analysis/ScalarEvolutionTest.cpp b/llvm/unittests/Analysis/ScalarEvolutionTest.cpp
index 1a68823b4f25..5d7eded06a76 100644
--- a/llvm/unittests/Analysis/ScalarEvolutionTest.cpp
+++ b/llvm/unittests/Analysis/ScalarEvolutionTest.cpp
@@ -11,6 +11,7 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ScalarEvolutionNormalization.h"
+#include "llvm/Analysis/ScalarEvolutionPatternMatch.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/AsmParser/Parser.h"
#include "llvm/IR/Constants.h"
@@ -26,6 +27,8 @@
namespace llvm {
+using namespace SCEVPatternMatch;
+
// We use this fixture to ensure that we clean up ScalarEvolution before
// deleting the PassManager.
class ScalarEvolutionsTest : public testing::Test {
@@ -64,11 +67,6 @@ static std::optional<APInt> computeConstantDifference(ScalarEvolution &SE,
return SE.computeConstantDifference(LHS, RHS);
}
- static bool matchURem(ScalarEvolution &SE, const SCEV *Expr, const SCEV *&LHS,
- const SCEV *&RHS) {
- return SE.matchURem(Expr, LHS, RHS);
- }
-
static bool isImpliedCond(
ScalarEvolution &SE, ICmpInst::Predicate Pred, const SCEV *LHS,
const SCEV *RHS, ICmpInst::Predicate FoundPred, const SCEV *FoundLHS,
@@ -1524,7 +1522,7 @@ TEST_F(ScalarEvolutionsTest, MatchURem) {
auto *URemI = getInstructionByName(F, N);
auto *S = SE.getSCEV(URemI);
const SCEV *LHS, *RHS;
- EXPECT_TRUE(matchURem(SE, S, LHS, RHS));
+ EXPECT_TRUE(match(S, m_scev_URem(m_SCEV(LHS), m_SCEV(RHS), SE)));
EXPECT_EQ(LHS, SE.getSCEV(URemI->getOperand(0)));
EXPECT_EQ(RHS, SE.getSCEV(URemI->getOperand(1)));
EXPECT_EQ(LHS->getType(), S->getType());
@@ -1537,7 +1535,7 @@ TEST_F(ScalarEvolutionsTest, MatchURem) {
auto *URem1 = getInstructionByName(F, "rem4");
auto *S = SE.getSCEV(Ext);
const SCEV *LHS, *RHS;
- EXPECT_TRUE(matchURem(SE, S, LHS, RHS));
+ EXPECT_TRUE(match(S, m_scev_URem(m_SCEV(LHS), m_SCEV(RHS), SE)));
EXPECT_NE(LHS, SE.getSCEV(URem1->getOperand(0)));
// RHS and URem1->getOperand(1) have different widths, so compare the
// integer values.
diff --git a/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt b/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt
index 8d9474bf3789..c301e0b40e8f 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt
+++ b/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt
@@ -48,6 +48,10 @@ mlir_tablegen(LLVMIntrinsicFromLLVMIRConversions.inc -gen-intr-from-llvmir-conve
mlir_tablegen(LLVMConvertibleLLVMIRIntrinsics.inc -gen-convertible-llvmir-intrinsics)
add_mlir_dialect_tablegen_target(MLIRLLVMIntrinsicConversionsIncGen)
+set(LLVM_TARGET_DEFINITIONS LLVMDialectBytecode.td)
+mlir_tablegen(LLVMDialectBytecode.cpp.inc -gen-bytecode -bytecode-dialect="LLVM")
+add_public_tablegen_target(MLIRLLVMDialectBytecodeIncGen)
+
set(LLVM_TARGET_DEFINITIONS BasicPtxBuilderInterface.td)
mlir_tablegen(BasicPtxBuilderInterface.h.inc -gen-op-interface-decls)
mlir_tablegen(BasicPtxBuilderInterface.cpp.inc -gen-op-interface-defs)
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMDialectBytecode.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMDialectBytecode.td
new file mode 100644
index 000000000000..e7b202cd4f63
--- /dev/null
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMDialectBytecode.td
@@ -0,0 +1,353 @@
+//===-- LLVMDialectBytecode.td - LLVM bytecode defs --------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the LLVM bytecode reader/writer definition file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DIALECT_BYTECODE
+#define LLVM_DIALECT_BYTECODE
+
+include "mlir/IR/BytecodeBase.td"
+
+//===----------------------------------------------------------------------===//
+// Bytecode classes for attributes and types.
+//===----------------------------------------------------------------------===//
+
+def String :
+ WithParser <"succeeded($_reader.readString($_var))",
+ WithBuilder<"$_args",
+ WithPrinter<"$_writer.writeOwnedString($_getter)",
+ WithType <"StringRef">>>>;
+
+class Attr<string type> : WithType<type, Attribute>;
+
+class OptionalAttribute<string type> :
+ WithParser <"succeeded($_reader.readOptionalAttribute($_var))",
+ WithPrinter<"$_writer.writeOptionalAttribute($_getter)",
+ WithType<type, Attribute>>>;
+
+class OptionalInt<string type> :
+ WithParser <"succeeded(readOptionalInt($_reader, $_var))",
+ WithPrinter<"writeOptionalInt($_writer, $_getter)",
+ WithType<"std::optional<" # type # ">", VarInt>>>;
+
+class OptionalArrayRef<string eltType> :
+ WithParser <"succeeded(readOptionalArrayRef<"
+ # eltType # ">($_reader, $_var))",
+ WithPrinter<"writeOptionalArrayRef<"
+ # eltType # ">($_writer, $_getter)",
+ WithType<"SmallVector<"
+ # eltType # ">", Attribute>>>;
+
+class EnumClassFlag<string flag, string getter> :
+ WithParser<"succeeded($_reader.readVarInt($_var))",
+ WithBuilder<"(" # flag # ")$_args",
+ WithPrinter<"$_writer.writeVarInt((uint64_t)$_name." # getter # ")",
+ WithType<"uint64_t", VarInt>>>>;
+
+//===----------------------------------------------------------------------===//
+// General notes
+// - For each attribute or type entry, the argument names should match
+// LLVMAttrDefs.td
+// - The mnemonics are either LLVM or builtin MLIR attributes and types, but
+// regular C++ types are also allowed to match builders and parsers.
+// - DIScopeAttr and DINodeAttr are empty base classes, custom encoding not
+// needed.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// DIBasicTypeAttr
+//===----------------------------------------------------------------------===//
+
+def DIBasicTypeAttr : DialectAttribute<(attr
+ VarInt:$tag,
+ String:$name,
+ VarInt:$sizeInBits,
+ VarInt:$encoding
+)>;
+
+//===----------------------------------------------------------------------===//
+// DIExpressionAttr, DIExpressionElemAttr
+//===----------------------------------------------------------------------===//
+
+def DIExpressionElemAttr : DialectAttribute<(attr
+ VarInt:$opcode,
+ OptionalArrayRef<"uint64_t">:$arguments
+)>;
+
+def DIExpressionAttr : DialectAttribute<(attr
+ OptionalArrayRef<"DIExpressionElemAttr">:$operations
+)>;
+
+//===----------------------------------------------------------------------===//
+// DIFileAttr
+//===----------------------------------------------------------------------===//
+
+def DIFileAttr : DialectAttribute<(attr
+ String:$name,
+ String:$directory
+)>;
+
+//===----------------------------------------------------------------------===//
+// DILocalVariableAttr
+//===----------------------------------------------------------------------===//
+
+def DILocalVariableAttr : DialectAttribute<(attr
+ Attr<"DIScopeAttr">:$scope,
+ OptionalAttribute<"StringAttr">:$name,
+ OptionalAttribute<"DIFileAttr">:$file,
+ VarInt:$line,
+ VarInt:$arg,
+ VarInt:$alignInBits,
+ OptionalAttribute<"DITypeAttr">:$type,
+ EnumClassFlag<"DIFlags", "getFlags()">:$_rawflags,
+ LocalVar<"DIFlags", "(DIFlags)_rawflags">:$flags
+)> {
+ // DILocalVariableAttr direct getter uses a `StringRef` for `name`. Since the
+ // more direct getter is prefered during bytecode reading, force the base one
+ // and prevent crashes for empty `StringAttr`.
+ let cBuilder = "$_resultType::get(context, $_args)";
+}
+
+//===----------------------------------------------------------------------===//
+// DISubroutineTypeAttr
+//===----------------------------------------------------------------------===//
+
+def DISubroutineTypeAttr : DialectAttribute<(attr
+ VarInt:$callingConvention,
+ OptionalArrayRef<"DITypeAttr">:$types
+)>;
+
+//===----------------------------------------------------------------------===//
+// DICompileUnitAttr
+//===----------------------------------------------------------------------===//
+
+def DICompileUnitAttr : DialectAttribute<(attr
+ Attr<"DistinctAttr">:$id,
+ VarInt:$sourceLanguage,
+ Attr<"DIFileAttr">:$file,
+ OptionalAttribute<"StringAttr">:$producer,
+ Bool:$isOptimized,
+ EnumClassFlag<"DIEmissionKind", "getEmissionKind()">:$_rawEmissionKind,
+ LocalVar<"DIEmissionKind", "(DIEmissionKind)_rawEmissionKind">:$emissionKind,
+ EnumClassFlag<"DINameTableKind", "getNameTableKind()">:$_rawNameTableKind,
+ LocalVar<"DINameTableKind",
+ "(DINameTableKind)_rawNameTableKind">:$nameTableKind
+)>;
+
+//===----------------------------------------------------------------------===//
+// DISubprogramAttr
+//===----------------------------------------------------------------------===//
+
+def DISubprogramAttr : DialectAttribute<(attr
+ OptionalAttribute<"DistinctAttr">:$recId,
+ Bool:$isRecSelf,
+ OptionalAttribute<"DistinctAttr">:$id,
+ OptionalAttribute<"DICompileUnitAttr">:$compileUnit,
+ OptionalAttribute<"DIScopeAttr">:$scope,
+ OptionalAttribute<"StringAttr">:$name,
+ OptionalAttribute<"StringAttr">:$linkageName,
+ OptionalAttribute<"DIFileAttr">:$file,
+ VarInt:$line,
+ VarInt:$scopeLine,
+ EnumClassFlag<"DISubprogramFlags", "getSubprogramFlags()">:$_rawflags,
+ LocalVar<"DISubprogramFlags", "(DISubprogramFlags)_rawflags">:$subprogramFlags,
+ OptionalAttribute<"DISubroutineTypeAttr">:$type,
+ OptionalArrayRef<"DINodeAttr">:$retainedNodes,
+ OptionalArrayRef<"DINodeAttr">:$annotations
+)>;
+
+//===----------------------------------------------------------------------===//
+// DICompositeTypeAttr
+//===----------------------------------------------------------------------===//
+
+def DICompositeTypeAttr : DialectAttribute<(attr
+ OptionalAttribute<"DistinctAttr">:$recId,
+ Bool:$isRecSelf,
+ VarInt:$tag,
+ OptionalAttribute<"StringAttr">:$name,
+ OptionalAttribute<"DIFileAttr">:$file,
+ VarInt:$line,
+ OptionalAttribute<"DIScopeAttr">:$scope,
+ OptionalAttribute<"DITypeAttr">:$baseType,
+ EnumClassFlag<"DIFlags", "getFlags()">:$_rawflags,
+ LocalVar<"DIFlags", "(DIFlags)_rawflags">:$flags,
+ VarInt:$sizeInBits,
+ VarInt:$alignInBits,
+ OptionalAttribute<"DIExpressionAttr">:$dataLocation,
+ OptionalAttribute<"DIExpressionAttr">:$rank,
+ OptionalAttribute<"DIExpressionAttr">:$allocated,
+ OptionalAttribute<"DIExpressionAttr">:$associated,
+ OptionalArrayRef<"DINodeAttr">:$elements
+)>;
+
+//===----------------------------------------------------------------------===//
+// DIDerivedTypeAttr
+//===----------------------------------------------------------------------===//
+
+def DIDerivedTypeAttr : DialectAttribute<(attr
+ VarInt:$tag,
+ OptionalAttribute<"StringAttr">:$name,
+ OptionalAttribute<"DITypeAttr">:$baseType,
+ VarInt:$sizeInBits,
+ VarInt:$alignInBits,
+ VarInt:$offsetInBits,
+ OptionalInt<"unsigned">:$dwarfAddressSpace,
+ OptionalAttribute<"DINodeAttr">:$extraData
+)>;
+
+//===----------------------------------------------------------------------===//
+// DIImportedEntityAttr
+//===----------------------------------------------------------------------===//
+
+def DIImportedEntityAttr : DialectAttribute<(attr
+ VarInt:$tag,
+ Attr<"DIScopeAttr">:$scope,
+ Attr<"DINodeAttr">:$entity,
+ OptionalAttribute<"DIFileAttr">:$file,
+ VarInt:$line,
+ OptionalAttribute<"StringAttr">:$name,
+ OptionalArrayRef<"DINodeAttr">:$elements
+)>;
+
+//===----------------------------------------------------------------------===//
+// DIGlobalVariableAttr, DIGlobalVariableExpressionAttr
+//===----------------------------------------------------------------------===//
+
+def DIGlobalVariableAttr : DialectAttribute<(attr
+ OptionalAttribute<"DIScopeAttr">:$scope,
+ OptionalAttribute<"StringAttr">:$name,
+ OptionalAttribute<"StringAttr">:$linkageName,
+ Attr<"DIFileAttr">:$file,
+ VarInt:$line,
+ Attr<"DITypeAttr">:$type,
+ Bool:$isLocalToUnit,
+ Bool:$isDefined,
+ VarInt:$alignInBits
+)>;
+
+def DIGlobalVariableExpressionAttr : DialectAttribute<(attr
+ Attr<"DIGlobalVariableAttr">:$var,
+ OptionalAttribute<"DIExpressionAttr">:$expr
+)>;
+
+//===----------------------------------------------------------------------===//
+// DILabelAttr
+//===----------------------------------------------------------------------===//
+
+def DILabelAttr : DialectAttribute<(attr
+ Attr<"DIScopeAttr">:$scope,
+ OptionalAttribute<"StringAttr">:$name,
+ OptionalAttribute<"DIFileAttr">:$file,
+ VarInt:$line
+)> {
+ // DILabelAttr direct getter uses a `StringRef` for `name`. Since the
+ // more direct getter is prefered during bytecode reading, force the base one
+ // and prevent crashes for empty `StringAttr`.
+ let cBuilder = "$_resultType::get(context, $_args)";
+}
+
+//===----------------------------------------------------------------------===//
+// DILexicalBlockAttr, DILexicalBlockFileAttr
+//===----------------------------------------------------------------------===//
+
+def DILexicalBlockAttr : DialectAttribute<(attr
+ Attr<"DIScopeAttr">:$scope,
+ OptionalAttribute<"DIFileAttr">:$file,
+ VarInt:$line,
+ VarInt:$column
+)>;
+
+def DILexicalBlockFileAttr : DialectAttribute<(attr
+ Attr<"DIScopeAttr">:$scope,
+ OptionalAttribute<"DIFileAttr">:$file,
+ VarInt:$discriminator
+)>;
+
+//===----------------------------------------------------------------------===//
+// DINamespaceAttr
+//===----------------------------------------------------------------------===//
+
+def DINamespaceAttr : DialectAttribute<(attr
+ OptionalAttribute<"StringAttr">:$name,
+ OptionalAttribute<"DIScopeAttr">:$scope,
+ Bool:$exportSymbols
+)>;
+
+//===----------------------------------------------------------------------===//
+// DISubrangeAttr
+//===----------------------------------------------------------------------===//
+
+def DISubrangeAttr : DialectAttribute<(attr
+ OptionalAttribute<"Attribute">:$count,
+ OptionalAttribute<"Attribute">:$lowerBound,
+ OptionalAttribute<"Attribute">:$upperBound,
+ OptionalAttribute<"Attribute">:$stride
+)>;
+
+//===----------------------------------------------------------------------===//
+// LoopAnnotationAttr
+//===----------------------------------------------------------------------===//
+
+def LoopAnnotationAttr : DialectAttribute<(attr
+ OptionalAttribute<"BoolAttr">:$disableNonforced,
+ OptionalAttribute<"LoopVectorizeAttr">:$vectorize,
+ OptionalAttribute<"LoopInterleaveAttr">:$interleave,
+ OptionalAttribute<"LoopUnrollAttr">:$unroll,
+ OptionalAttribute<"LoopUnrollAndJamAttr">:$unrollAndJam,
+ OptionalAttribute<"LoopLICMAttr">:$licm,
+ OptionalAttribute<"LoopDistributeAttr">:$distribute,
+ OptionalAttribute<"LoopPipelineAttr">:$pipeline,
+ OptionalAttribute<"LoopPeeledAttr">:$peeled,
+ OptionalAttribute<"LoopUnswitchAttr">:$unswitch,
+ OptionalAttribute<"BoolAttr">:$mustProgress,
+ OptionalAttribute<"BoolAttr">:$isVectorized,
+ OptionalAttribute<"FusedLoc">:$startLoc,
+ OptionalAttribute<"FusedLoc">:$endLoc,
+ OptionalArrayRef<"AccessGroupAttr">:$parallelAccesses
+)>;
+
+//===----------------------------------------------------------------------===//
+// Attributes & Types with custom bytecode handling.
+//===----------------------------------------------------------------------===//
+
+// All the attributes with custom bytecode handling.
+def LLVMDialectAttributes : DialectAttributes<"LLVM"> {
+ let elems = [
+ DIBasicTypeAttr,
+ DICompileUnitAttr,
+ DICompositeTypeAttr,
+ DIDerivedTypeAttr,
+ DIExpressionElemAttr,
+ DIExpressionAttr,
+ DIFileAttr,
+ DIGlobalVariableAttr,
+ DIGlobalVariableExpressionAttr,
+ DIImportedEntityAttr,
+ DILabelAttr,
+ DILexicalBlockAttr,
+ DILexicalBlockFileAttr,
+ DILocalVariableAttr,
+ DINamespaceAttr,
+ DISubprogramAttr,
+ DISubrangeAttr,
+ DISubroutineTypeAttr,
+ LoopAnnotationAttr
+ // Referenced attributes currently missing support:
+ // AccessGroupAttr, LoopVectorizeAttr, LoopInterleaveAttr, LoopUnrollAttr,
+ // LoopUnrollAndJamAttr, LoopLICMAttr, LoopDistributeAttr, LoopPipelineAttr,
+ // LoopPeeledAttr, LoopUnswitchAttr
+ ];
+}
+
+def LLVMDialectTypes : DialectTypes<"LLVM"> {
+ let elems = [];
+}
+
+#endif // LLVM_DIALECT_BYTECODE
diff --git a/mlir/lib/Dialect/LLVMIR/CMakeLists.txt b/mlir/lib/Dialect/LLVMIR/CMakeLists.txt
index ec581ac7277e..cc66face1c00 100644
--- a/mlir/lib/Dialect/LLVMIR/CMakeLists.txt
+++ b/mlir/lib/Dialect/LLVMIR/CMakeLists.txt
@@ -8,11 +8,13 @@ add_mlir_dialect_library(MLIRLLVMDialect
IR/LLVMMemorySlot.cpp
IR/LLVMTypes.cpp
IR/LLVMTypeSyntax.cpp
+ IR/LLVMDialectBytecode.cpp
ADDITIONAL_HEADER_DIRS
${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/LLVMIR
DEPENDS
+ MLIRLLVMDialectBytecodeIncGen
MLIRLLVMOpsIncGen
MLIRLLVMTypesIncGen
MLIRLLVMIntrinsicOpsIncGen
diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
index 5d08cccb4faa..7ca09d9c943e 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
@@ -29,6 +29,8 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/Support/Error.h"
+#include "LLVMDialectBytecode.h"
+
#include <numeric>
#include <optional>
@@ -4237,6 +4239,7 @@ void LLVMDialect::initialize() {
// Support unknown operations because not all LLVM operations are registered.
allowUnknownOperations();
declarePromisedInterface<DialectInlinerInterface, LLVMDialect>();
+ detail::addBytecodeInterface(this);
}
#define GET_OP_CLASSES
diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialectBytecode.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialectBytecode.cpp
new file mode 100644
index 000000000000..41d1f80580cf
--- /dev/null
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialectBytecode.cpp
@@ -0,0 +1,154 @@
+//===- LLVMDialectBytecode.cpp - LLVM Bytecode Implementation -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "LLVMDialectBytecode.h"
+#include "mlir/Bytecode/BytecodeImplementation.h"
+#include "mlir/Dialect/LLVMIR/LLVMAttrs.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Dialect/LLVMIR/LLVMTypes.h"
+#include "mlir/IR/Diagnostics.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/TypeSwitch.h"
+#include <type_traits>
+
+using namespace mlir;
+using namespace mlir::LLVM;
+
+namespace {
+
+// Provide some forward declarations of the functions that will be generated by
+// the include below.
+static void write(DIExpressionElemAttr attribute,
+ DialectBytecodeWriter &writer);
+static LogicalResult writeAttribute(Attribute attribute,
+ DialectBytecodeWriter &writer);
+
+//===--------------------------------------------------------------------===//
+// Optional ArrayRefs
+//
+// Note that both the writer and reader functions consider attributes to be
+// optional. This is because the attribute may be present or empty.
+//===--------------------------------------------------------------------===//
+
+template <class EntryTy>
+static void writeOptionalArrayRef(DialectBytecodeWriter &writer,
+ ArrayRef<EntryTy> storage) {
+ if (storage.empty()) {
+ writer.writeOwnedBool(false);
+ return;
+ }
+
+ writer.writeOwnedBool(true);
+ writer.writeList(storage, [&](EntryTy val) {
+ if constexpr (std::is_base_of_v<Attribute, EntryTy>) {
+ (void)writer.writeOptionalAttribute(val);
+ } else if constexpr (std::is_integral_v<EntryTy>) {
+ (void)writer.writeVarInt(val);
+ } else {
+ static_assert(true, "EntryTy not supported");
+ }
+ });
+}
+
+template <class EntryTy>
+static LogicalResult readOptionalArrayRef(DialectBytecodeReader &reader,
+ SmallVectorImpl<EntryTy> &storage) {
+ bool isPresent = false;
+ if (failed(reader.readBool(isPresent)))
+ return failure();
+ // Nothing to do here, the array is empty.
+ if (!isPresent)
+ return success();
+
+ auto readEntry = [&]() -> FailureOr<EntryTy> {
+ EntryTy temp;
+ if constexpr (std::is_base_of_v<Attribute, EntryTy>) {
+ if (succeeded(reader.readOptionalAttribute(temp)))
+ return temp;
+ } else if constexpr (std::is_integral_v<EntryTy>) {
+ if (succeeded(reader.readVarInt(temp)))
+ return temp;
+ } else {
+ static_assert(true, "EntryTy not supported");
+ }
+ return failure();
+ };
+
+ return reader.readList(storage, readEntry);
+}
+
+//===--------------------------------------------------------------------===//
+// Optional integral types
+//===--------------------------------------------------------------------===//
+
+template <class EntryTy>
+static void writeOptionalInt(DialectBytecodeWriter &writer,
+ std::optional<EntryTy> storage) {
+ static_assert(std::is_integral_v<EntryTy>,
+ "EntryTy must be an integral type");
+ EntryTy val = storage.value_or(0);
+ writer.writeVarIntWithFlag(val, storage.has_value());
+}
+
+template <class EntryTy>
+static LogicalResult readOptionalInt(DialectBytecodeReader &reader,
+ std::optional<EntryTy> &storage) {
+ static_assert(std::is_integral_v<EntryTy>,
+ "EntryTy must be an integral type");
+ uint64_t result = 0;
+ bool flag = false;
+ if (failed(reader.readVarIntWithFlag(result, flag)))
+ return failure();
+ if (flag)
+ storage = static_cast<EntryTy>(result);
+ else
+ storage = std::nullopt;
+ return success();
+}
+
+//===--------------------------------------------------------------------===//
+// Tablegen generated bytecode functions
+//===--------------------------------------------------------------------===//
+
+#include "mlir/Dialect/LLVMIR/LLVMDialectBytecode.cpp.inc"
+
+//===--------------------------------------------------------------------===//
+// LLVMDialectBytecodeInterface
+//===--------------------------------------------------------------------===//
+
+/// This class implements the bytecode interface for the LLVM dialect.
+struct LLVMDialectBytecodeInterface : public BytecodeDialectInterface {
+ LLVMDialectBytecodeInterface(Dialect *dialect)
+ : BytecodeDialectInterface(dialect) {}
+
+ // Attributes
+ Attribute readAttribute(DialectBytecodeReader &reader) const override {
+ return ::readAttribute(getContext(), reader);
+ }
+
+ LogicalResult writeAttribute(Attribute attr,
+ DialectBytecodeWriter &writer) const override {
+ return ::writeAttribute(attr, writer);
+ }
+
+ // Types
+ Type readType(DialectBytecodeReader &reader) const override {
+ return ::readType(getContext(), reader);
+ }
+
+ LogicalResult writeType(Type type,
+ DialectBytecodeWriter &writer) const override {
+ return ::writeType(type, writer);
+ }
+};
+} // namespace
+
+void LLVM::detail::addBytecodeInterface(LLVMDialect *dialect) {
+ dialect->addInterfaces<LLVMDialectBytecodeInterface>();
+}
diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialectBytecode.h b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialectBytecode.h
new file mode 100644
index 000000000000..1a17cb462ccf
--- /dev/null
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialectBytecode.h
@@ -0,0 +1,27 @@
+//===- LLVMDialectBytecode.h - LLVM Bytecode Implementation -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines hooks into the LLVM dialect bytecode
+// implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LIB_MLIR_DIALECT_LLVM_IR_LLVMDIALECTBYTECODE_H
+#define LIB_MLIR_DIALECT_LLVM_IR_LLVMDIALECTBYTECODE_H
+
+namespace mlir::LLVM {
+class LLVMDialect;
+
+namespace detail {
+/// Add the interfaces necessary for encoding the LLVM dialect components in
+/// bytecode.
+void addBytecodeInterface(LLVMDialect *dialect);
+} // namespace detail
+} // namespace mlir::LLVM
+
+#endif // LIB_MLIR_DIALECT_LLVM_IR_LLVMDIALECTBYTECODE_H
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp
index 36c498e8b849..f77784abaf0b 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp
@@ -161,11 +161,24 @@ XeGPUBlockingPass::getTileShape(Operation *op) const {
xegpu::UpdateOffsetOp, xegpu::LoadMatrixOp>(op))
return getTileShape(op->getOpResult(0));
if (isa<xegpu::PrefetchNdOp, xegpu::LoadNdOp, xegpu::PrefetchOp,
- xegpu::LoadGatherOp, xegpu::StoreMatrixOp>(op))
+ xegpu::StoreMatrixOp>(op))
return getTileShape(op->getOpOperand(0));
- if (isa<xegpu::StoreNdOp, xegpu::StoreScatterOp>(op))
+ if (isa<xegpu::StoreNdOp>(op))
return getTileShape(op->getOpOperand(1));
+ // Handle LoadGatherOp and StoreScatterOp (with and without offset)
+ if (auto loadGatherOp = dyn_cast<xegpu::LoadGatherOp>(op)) {
+ if (loadGatherOp.getOffsets())
+ return getTileShape(loadGatherOp->getOpResult(0));
+ else
+ return getTileShape(loadGatherOp->getOpOperand(0));
+ }
+
+ if (auto storeScatterOp = dyn_cast<xegpu::StoreScatterOp>(op))
+ return getTileShape(storeScatterOp.getOffsets()
+ ? storeScatterOp->getOpOperand(0)
+ : storeScatterOp->getOpOperand(1));
+
if (isa<xegpu::DpasOp>(op)) {
std::optional<SmallVector<int64_t>> aTile =
getTileShape(op->getOpOperand(0));
diff --git a/mlir/test/Dialect/LLVMIR/bytecode.mlir b/mlir/test/Dialect/LLVMIR/bytecode.mlir
new file mode 100644
index 000000000000..821b0ac2196a
--- /dev/null
+++ b/mlir/test/Dialect/LLVMIR/bytecode.mlir
@@ -0,0 +1,35 @@
+// RUN: mlir-opt -verify-roundtrip %s
+
+#access_group = #llvm.access_group<id = distinct[0]<>>
+#access_group1 = #llvm.access_group<id = distinct[1]<>>
+#di_subprogram = #llvm.di_subprogram<recId = distinct[2]<>>
+#loc1 = loc("test.f90":12:14)
+#loc2 = loc("test":4:3)
+#loc6 = loc(fused<#di_subprogram>[#loc1])
+#loc7 = loc(fused<#di_subprogram>[#loc2])
+#loop_annotation = #llvm.loop_annotation<disableNonforced = false, mustProgress = true, startLoc = #loc6, endLoc = #loc7, parallelAccesses = #access_group, #access_group1>
+module {
+ llvm.func @imp_fn() {
+ llvm.return loc(#loc2)
+ } loc(#loc8)
+ llvm.func @loop_annotation_with_locs() {
+ llvm.br ^bb1 {loop_annotation = #loop_annotation} loc(#loc4)
+ ^bb1: // pred: ^bb0
+ llvm.return loc(#loc5)
+ } loc(#loc3)
+} loc(#loc)
+#di_file = #llvm.di_file<"test.f90" in "">
+#di_subroutine_type = #llvm.di_subroutine_type<callingConvention = DW_CC_program>
+#loc = loc("test":0:0)
+#loc3 = loc("test-path":36:3)
+#loc4 = loc("test-path":37:5)
+#loc5 = loc("test-path":39:5)
+#di_compile_unit = #llvm.di_compile_unit<id = distinct[3]<>, sourceLanguage = DW_LANG_Fortran95, file = #di_file, isOptimized = false, emissionKind = Full>
+#di_compile_unit1 = #llvm.di_compile_unit<id = distinct[4]<>, sourceLanguage = DW_LANG_Fortran95, file = #di_file, isOptimized = false, emissionKind = Full>
+#di_compile_unit2 = #llvm.di_compile_unit<id = distinct[5]<>, sourceLanguage = DW_LANG_Fortran95, file = #di_file, isOptimized = false, emissionKind = Full>
+#di_module = #llvm.di_module<file = #di_file, scope = #di_compile_unit1, name = "mod1">
+#di_module1 = #llvm.di_module<file = #di_file, scope = #di_compile_unit2, name = "mod2">
+#di_imported_entity = #llvm.di_imported_entity<tag = DW_TAG_imported_module, scope = #di_subprogram, entity = #di_module, file = #di_file, line = 1>
+#di_imported_entity1 = #llvm.di_imported_entity<tag = DW_TAG_imported_module, scope = #di_subprogram, entity = #di_module1, file = #di_file, line = 1>
+#di_subprogram1 = #llvm.di_subprogram<recId = distinct[2]<>, id = distinct[6]<>, compileUnit = #di_compile_unit, scope = #di_file, name = "imp_fn", file = #di_file, subprogramFlags = Definition, type = #di_subroutine_type, retainedNodes = #di_imported_entity, #di_imported_entity1>
+#loc8 = loc(fused<#di_subprogram1>[#loc1])
diff --git a/mlir/test/Dialect/LLVMIR/debuginfo.mlir b/mlir/test/Dialect/LLVMIR/debuginfo.mlir
index 1834b0a52470..d7bf99bfaed7 100644
--- a/mlir/test/Dialect/LLVMIR/debuginfo.mlir
+++ b/mlir/test/Dialect/LLVMIR/debuginfo.mlir
@@ -1,4 +1,5 @@
// RUN: mlir-opt %s | mlir-opt | FileCheck %s
+// RUN: mlir-opt -emit-bytecode %s | mlir-opt | FileCheck %s
// CHECK-DAG: #[[FILE:.*]] = #llvm.di_file<"debuginfo.mlir" in "/test/">
#file = #llvm.di_file<"debuginfo.mlir" in "/test/">
diff --git a/mlir/test/Dialect/LLVMIR/roundtrip.mlir b/mlir/test/Dialect/LLVMIR/roundtrip.mlir
index 73447978341d..00e763a8ffc0 100644
--- a/mlir/test/Dialect/LLVMIR/roundtrip.mlir
+++ b/mlir/test/Dialect/LLVMIR/roundtrip.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s | mlir-opt | FileCheck %s
+// RUN: mlir-opt -verify-roundtrip %s
// CHECK-LABEL: func @baz
@@ -757,7 +757,7 @@ llvm.func @stackrestore(%arg0: !llvm.ptr) {
// CHECK-LABEL: @experimental_noalias_scope_decl
llvm.func @experimental_noalias_scope_decl() {
- // CHECK: llvm.intr.experimental.noalias.scope.decl #{{.*}}
+ // CHECK: llvm.intr.experimental.noalias.scope.decl #alias_scope{{.*}}
llvm.intr.experimental.noalias.scope.decl #alias_scope
llvm.return
}
@@ -767,7 +767,7 @@ llvm.func @experimental_noalias_scope_decl() {
// CHECK-LABEL: @experimental_noalias_scope_with_string_id
llvm.func @experimental_noalias_scope_with_string_id() {
- // CHECK: llvm.intr.experimental.noalias.scope.decl #{{.*}}
+ // CHECK: llvm.intr.experimental.noalias.scope.decl #alias_scope{{.*}}
llvm.intr.experimental.noalias.scope.decl #alias_scope2
llvm.return
}
diff --git a/mlir/test/Dialect/XeGPU/xegpu-blocking.mlir b/mlir/test/Dialect/XeGPU/xegpu-blocking.mlir
index 9d63c2ddd489..fe4f44c0b02a 100644
--- a/mlir/test/Dialect/XeGPU/xegpu-blocking.mlir
+++ b/mlir/test/Dialect/XeGPU/xegpu-blocking.mlir
@@ -584,3 +584,101 @@ gpu.module @test_kernel {
gpu.return
}
}
+
+// -----
+gpu.module @test_kernel {
+ // CHECK-LABEL: load_with_offsets
+ // CHECK-COUNT-2: xegpu.load {{.*}}[{{.*}}], {{.*}} <{chunk_size = 1 : i64, l1_hint = #xegpu.cache_hint<cached>}> : ui64, vector<16xindex>, vector<16xi1> -> vector<16xf32>
+ gpu.func @load_with_offsets(%src: ui64) -> vector<32xf32> {
+ %cst = arith.constant dense<[
+ 0, 8, 16, 24, 32, 40, 48, 56,
+ 64, 72, 80, 88, 96, 104, 112, 120,
+ 128, 136, 144, 152, 160, 168, 176, 184,
+ 192, 200, 208, 216, 224, 232, 240, 248
+ ]> : vector<32xindex>
+
+ %c17 = arith.constant 17: index
+ %mask = vector.create_mask %c17: vector<32xi1>
+ %ld = xegpu.load %src[%cst], %mask {chunk_size = 1, layout_result_0 = #xegpu.layout<inst_data = [16]>, l1_hint = #xegpu.cache_hint<cached>} : ui64, vector<32xindex>, vector<32xi1> -> vector<32xf32>
+
+ gpu.return %ld : vector<32xf32>
+ }
+}
+
+// -----
+gpu.module @test_kernel {
+ // CHECK-LABEL: store_with_offsets
+ // CHECK-COUNT-2: xegpu.store {{.*}}[{{.*}}], {{.*}} <{chunk_size = 1 : i64, l1_hint = #xegpu.cache_hint<cached>}> : vector<16xf32>, ui64, vector<16xindex>, vector<16xi1>
+ gpu.func @store_with_offsets(%src: ui64) {
+ %cst = arith.constant dense<[
+ 0, 8, 16, 24, 32, 40, 48, 56,
+ 64, 72, 80, 88, 96, 104, 112, 120,
+ 128, 136, 144, 152, 160, 168, 176, 184,
+ 192, 200, 208, 216, 224, 232, 240, 248
+ ]> : vector<32xindex>
+
+ %c17 = arith.constant 17: index
+ %mask = vector.create_mask %c17: vector<32xi1>
+
+ %st_vec = arith.constant dense<1023.0>: vector<32xf32>
+ xegpu.store %st_vec, %src[%cst], %mask {chunk_size = 1, layout_operand_0 = #xegpu.layout<inst_data = [16]>,
+ layout_operand_2 = #xegpu.layout<inst_data = [16]>,
+ layout_operand_3 = #xegpu.layout<inst_data = [16]>,
+ l1_hint = #xegpu.cache_hint<cached>} : vector<32xf32>, ui64, vector<32xindex>, vector<32xi1>
+
+ gpu.return
+ }
+}
+
+// -----
+gpu.module @test_kernel {
+ // CHECK-LABEL: load_with_offsets_chunk
+ // CHECK: [[cst:%.+]] = arith.constant dense<0.000000e+00> : vector<32x4xf32>
+ // CHECK: [[cst0:%.+]] = arith.constant dense<[130, 138, 146, 154, 162, 170, 178, 186, 194, 202, 210, 218, 226, 234, 242, 250]> : vector<16xindex>
+ // CHECK: [[cst1:%.+]] = arith.constant dense<[2, 10, 18, 26, 34, 42, 50, 58, 66, 74, 82, 90, 98, 106, 114, 122]> : vector<16xindex>
+ // CHECK: [[cst2:%.+]] = arith.constant dense<[128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248]> : vector<16xindex>
+ // CHECK: [[cst3:%.+]] = arith.constant dense<[0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120]> : vector<16xindex>
+ // CHECK-COUNT-4: xegpu.load {{.*}}[{{.*}}], {{.*}} <{chunk_size = 2 : i64, l1_hint = #xegpu.cache_hint<cached>}> : ui64, vector<16xindex>, vector<16xi1> -> vector<16x2xf32>
+ gpu.func @load_with_offsets_chunk(%src: ui64) -> vector<32x4xf32> {
+ %cst = arith.constant dense<[
+ 0, 8, 16, 24, 32, 40, 48, 56,
+ 64, 72, 80, 88, 96, 104, 112, 120,
+ 128, 136, 144, 152, 160, 168, 176, 184,
+ 192, 200, 208, 216, 224, 232, 240, 248
+ ]> : vector<32xindex>
+
+ %c17 = arith.constant 17: index
+ %mask = vector.create_mask %c17: vector<32xi1>
+ %ld = xegpu.load %src[%cst], %mask {chunk_size = 4, layout_result_0 = #xegpu.layout<inst_data = [16, 2]>, l1_hint = #xegpu.cache_hint<cached>} : ui64, vector<32xindex>, vector<32xi1> -> vector<32x4xf32>
+ gpu.return %ld : vector<32x4xf32>
+ }
+}
+
+// -----
+gpu.module @test_kernel {
+ // CHECK-LABEL: store_with_offsets_chunk
+ // CHECK: [[cst:%.+]] = arith.constant dense<1.023000e+03> : vector<16x2xf32
+ // CHECK: [[cst0:%.+]] = arith.constant dense<[130, 138, 146, 154, 162, 170, 178, 186, 194, 202, 210, 218, 226, 234, 242, 250]> : vector<16xindex>
+ // CHECK: [[cst1:%.+]] = arith.constant dense<[2, 10, 18, 26, 34, 42, 50, 58, 66, 74, 82, 90, 98, 106, 114, 122]> : vector<16xindex>
+ // CHECK: [[cst2:%.+]] = arith.constant dense<[128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248]> : vector<16xindex>
+ // CHECK: [[cst3:%.+]] = arith.constant dense<[0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120]> : vector<16xindex>
+ // CHECK-COUNT-4: xegpu.store {{.*}}[{{.*}}], {{.*}} <{chunk_size = 2 : i64, l1_hint = #xegpu.cache_hint<cached>}> : vector<16x2xf32>, ui64, vector<16xindex>, vector<16xi1>
+ gpu.func @store_with_offsets_chunk(%src: ui64) {
+ %cst = arith.constant dense<[
+ 0, 8, 16, 24, 32, 40, 48, 56,
+ 64, 72, 80, 88, 96, 104, 112, 120,
+ 128, 136, 144, 152, 160, 168, 176, 184,
+ 192, 200, 208, 216, 224, 232, 240, 248
+ ]> : vector<32xindex>
+
+ %c17 = arith.constant 17: index
+ %mask = vector.create_mask %c17: vector<32xi1>
+
+ %st_vec = arith.constant dense<1023.>: vector<32x4xf32>
+ xegpu.store %st_vec, %src[%cst], %mask {chunk_size = 4, layout_operand_0 = #xegpu.layout<inst_data = [16, 2]>,
+ layout_operand_2 = #xegpu.layout<inst_data = [16, 2]>,
+ layout_operand_3 = #xegpu.layout<inst_data = [16, 2]>,
+ l1_hint = #xegpu.cache_hint<cached>} : vector<32x4xf32>, ui64, vector<32xindex>, vector<32xi1>
+ gpu.return
+ }
+}