summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKrzysztof Parzyszek <Krzysztof.Parzyszek@amd.com>2025-07-17 17:12:43 +0000
committerAiden Grossman <aidengrossman@google.com>2025-07-17 17:12:43 +0000
commit0b56fc832b3c44d5cbfe58575bf10e73432ac971 (patch)
tree379c3bdfedde61f1900ee1a89c5be6ebe5ab98f1
parenta1179b69528245aaca7afa0c60bf9a8dc1ad3e6c (diff)
parentff5784bb9094f6035851dc7abc4a5760fdc21e45 (diff)
[𝘀𝗽𝗿] changes introduced through rebaseusers/boomanaiden154/main.ci-migrate-monolithic-linux-script-to-sccache
Created using spr 1.3.4 [skip ci]
-rw-r--r--clang-tools-extra/README.txt9
-rw-r--r--clang/include/clang/Basic/LangOptions.h5
-rw-r--r--clang/include/clang/Driver/Options.td13
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp14
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp3
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenValue.h6
-rw-r--r--clang/lib/CodeGen/Targets/X86.cpp32
-rw-r--r--clang/lib/Driver/ToolChains/Clang.cpp9
-rw-r--r--clang/lib/Frontend/CompilerInvocation.cpp12
-rw-r--r--clang/lib/Frontend/InitPreprocessor.cpp12
-rw-r--r--clang/test/CIR/CodeGen/complex-builtins.cpp36
-rw-r--r--clang/test/CodeGen/X86/i128-debuginfo.c10
-rw-r--r--clang/test/CodeGen/X86/x86_64-arguments.c39
-rw-r--r--clang/test/CodeGen/alloc-align-attr.c58
-rw-r--r--clang/test/CodeGen/builtins.c18
-rw-r--r--clang/test/CodeGen/ext-int-cc.c4
-rw-r--r--clang/test/CodeGen/extend-arg-64.c2
-rw-r--r--clang/test/Driver/openacc.c12
-rw-r--r--clang/test/Preprocessor/openacc.c8
-rw-r--r--flang/lib/Lower/OpenACC.cpp122
-rw-r--r--flang/lib/Lower/OpenMP/OpenMP.cpp84
-rw-r--r--flang/lib/Lower/OpenMP/Utils.cpp84
-rw-r--r--flang/lib/Lower/OpenMP/Utils.h3
-rw-r--r--flang/test/Lower/OpenACC/acc-host-data-unwrap-defaultbounds.f9014
-rw-r--r--flang/test/Lower/OpenACC/acc-host-data.f9021
-rw-r--r--flang/test/Lower/OpenACC/acc-routine.f907
-rw-r--r--flang/test/Lower/OpenACC/acc-routine03.f902
-rw-r--r--flang/test/Lower/OpenACC/acc-use-device.f9061
-rw-r--r--libc/test/src/math/cospif_test.cpp2
-rw-r--r--libc/test/src/math/sincosf_test.cpp2
-rw-r--r--libc/test/src/math/sinpif_test.cpp2
-rw-r--r--lldb/source/Plugins/Process/minidump/MinidumpParser.cpp34
-rw-r--r--lldb/source/Plugins/Process/minidump/MinidumpParser.h3
-rw-r--r--lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp9
-rw-r--r--lldb/test/Shell/Minidump/missing-memory-region.yaml42
-rw-r--r--lldb/test/Shell/Settings/TestChildCountTruncation.test2
-rw-r--r--lldb/unittests/Process/minidump/MinidumpParserTest.cpp23
-rw-r--r--llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h1
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp87
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.cpp265
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.h4
-rw-r--r--llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp66
-rw-r--r--llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp23
-rw-r--r--llvm/test/CodeGen/AArch64/aarch64-combine-gather-lanes.mir364
-rw-r--r--llvm/test/CodeGen/AArch64/complex-deinterleaving-uniform-cases.ll134
-rw-r--r--llvm/test/CodeGen/AArch64/concat-vector.ll5
-rw-r--r--llvm/test/CodeGen/AArch64/fp-maximumnum-minimumnum.ll50
-rw-r--r--llvm/test/CodeGen/AArch64/fsh.ll113
-rw-r--r--llvm/test/CodeGen/AArch64/llvm.frexp.ll14
-rw-r--r--llvm/test/CodeGen/AArch64/neon-dotreduce.ll345
-rw-r--r--llvm/test/CodeGen/AArch64/nontemporal.ll48
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll88
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll30
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vminu-sdnode.ll88
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll45
-rw-r--r--llvm/test/TableGen/directive1.td25
-rw-r--r--llvm/test/TableGen/directive2.td25
-rw-r--r--llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll4
-rw-r--r--llvm/unittests/Frontend/OpenMPDirectiveNameParserTest.cpp21
-rw-r--r--llvm/utils/TableGen/Basic/DirectiveEmitter.cpp22
-rw-r--r--mlir/include/mlir/Analysis/Presburger/IntegerRelation.h13
-rw-r--r--mlir/include/mlir/Dialect/OpenACC/OpenACC.h1
-rw-r--r--mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td12
-rw-r--r--mlir/include/mlir/IR/PatternMatch.h19
-rw-r--r--mlir/lib/Analysis/Presburger/IntegerRelation.cpp38
-rw-r--r--mlir/lib/Bindings/Python/IRCore.cpp22
-rw-r--r--mlir/lib/Bindings/Python/IRModule.h14
-rw-r--r--mlir/lib/Bindings/Python/Pass.cpp12
-rw-r--r--mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp112
-rw-r--r--mlir/lib/IR/CMakeLists.txt1
-rw-r--r--mlir/lib/IR/PatternLoggingListener.cpp50
-rw-r--r--mlir/lib/Rewrite/PatternApplicator.cpp16
-rw-r--r--mlir/python/mlir/_mlir_libs/_mlir/ir.pyi7
-rw-r--r--mlir/python/mlir/_mlir_libs/_mlir/passmanager.pyi1
-rw-r--r--mlir/test/IR/test-pattern-logging-listener.mlir17
-rw-r--r--mlir/test/lit.cfg.py11
-rw-r--r--mlir/test/python/ir/operation.py9
-rw-r--r--mlir/test/python/pass_manager.py57
-rw-r--r--mlir/unittests/Analysis/Presburger/IntegerRelationTest.cpp94
-rw-r--r--mlir/unittests/Dialect/OpenACC/OpenACCOpsTest.cpp44
80 files changed, 2340 insertions, 896 deletions
diff --git a/clang-tools-extra/README.txt b/clang-tools-extra/README.txt
index 6891e4078997..1195db9b468d 100644
--- a/clang-tools-extra/README.txt
+++ b/clang-tools-extra/README.txt
@@ -8,12 +8,13 @@ Clang frontend. These tools are kept in a separate "extra" repository to
allow lighter weight checkouts of the core Clang codebase.
All discussion regarding Clang, Clang-based tools, and code in this repository
-should be held using the standard Clang forum:
+should be held using the standard Clang forums:
https://discourse.llvm.org/c/clang
+ https://discourse.llvm.org/c/clang/clang-tidy/71
+ https://discourse.llvm.org/c/clang/clangd/34
-Code review for this tree should take place on the standard Clang patch and
-commit lists:
- http://lists.llvm.org/mailman/listinfo/cfe-commits
+Code review for this tree should take place on Github:
+ https://github.com/llvm/llvm-project/pulls?q=label%3Aclang-tools-extra
If you find a bug in these tools, please file it in the LLVM bug tracker:
https://github.com/llvm/llvm-project/issues/
diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h
index 937cbff4e3ea..0407897359b5 100644
--- a/clang/include/clang/Basic/LangOptions.h
+++ b/clang/include/clang/Basic/LangOptions.h
@@ -633,11 +633,6 @@ public:
// received as a result of a standard operator new (-fcheck-new)
bool CheckNew = false;
- // In OpenACC mode, contains a user provided override for the _OPENACC macro.
- // This exists so that we can override the macro value and test our incomplete
- // implementation on real-world examples.
- std::string OpenACCMacroOverride;
-
/// The HLSL root signature version for dxil.
llvm::dxbc::RootSignatureVersion HLSLRootSigVer =
llvm::dxbc::RootSignatureVersion::V1_1;
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index a8c1b5dd8ab3..6c22f06b269f 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -1422,19 +1422,6 @@ def fno_hip_emit_relocatable : Flag<["-"], "fno-hip-emit-relocatable">,
HelpText<"Do not override toolchain to compile HIP source to relocatable">;
}
-// Clang specific/exclusive options for OpenACC.
-def openacc_macro_override
- : Separate<["-"], "fexperimental-openacc-macro-override">,
- Visibility<[ClangOption, CC1Option]>,
- Group<f_Group>,
- HelpText<"Overrides the _OPENACC macro value for experimental testing "
- "during OpenACC support development">;
-def openacc_macro_override_EQ
- : Joined<["-"], "fexperimental-openacc-macro-override=">,
- Alias<openacc_macro_override>;
-
-// End Clang specific/exclusive options for OpenACC.
-
def libomptarget_amdgpu_bc_path_EQ : Joined<["--"], "libomptarget-amdgpu-bc-path=">, Group<i_Group>,
HelpText<"Path to libomptarget-amdgcn bitcode library">;
def libomptarget_amdgcn_bc_path_EQ : Joined<["--"], "libomptarget-amdgcn-bc-path=">, Group<i_Group>,
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index 476f99495928..61d1c54ee9ec 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -125,7 +125,7 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
mlir::Value real = emitScalarExpr(e->getArg(0));
mlir::Value imag = emitScalarExpr(e->getArg(1));
mlir::Value complex = builder.createComplexCreate(loc, real, imag);
- return RValue::get(complex);
+ return RValue::getComplex(complex);
}
case Builtin::BI__builtin_creal:
@@ -150,6 +150,18 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
return RValue::get(imag);
}
+ case Builtin::BI__builtin_conj:
+ case Builtin::BI__builtin_conjf:
+ case Builtin::BI__builtin_conjl:
+ case Builtin::BIconj:
+ case Builtin::BIconjf:
+ case Builtin::BIconjl: {
+ mlir::Value complex = emitComplexExpr(e->getArg(0));
+ mlir::Value conj = builder.createUnaryOp(getLoc(e->getExprLoc()),
+ cir::UnaryOpKind::Not, complex);
+ return RValue::getComplex(conj);
+ }
+
case Builtin::BI__builtin_clrsb:
case Builtin::BI__builtin_clrsbl:
case Builtin::BI__builtin_clrsbll:
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp b/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp
index 6663f5ea1e75..9f36be5397ad 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp
@@ -231,8 +231,7 @@ mlir::Value ComplexExprEmitter::VisitBinComma(const BinaryOperator *e) {
mlir::Value ComplexExprEmitter::VisitCallExpr(const CallExpr *e) {
if (e->getCallReturnType(cgf.getContext())->isReferenceType())
return emitLoadOfLValue(e);
-
- return cgf.emitCallExpr(e).getValue();
+ return cgf.emitCallExpr(e).getComplexValue();
}
mlir::Value ComplexExprEmitter::VisitCastExpr(CastExpr *e) {
diff --git a/clang/lib/CIR/CodeGen/CIRGenValue.h b/clang/lib/CIR/CodeGen/CIRGenValue.h
index 0a6dba5e80a6..0832c4141a10 100644
--- a/clang/lib/CIR/CodeGen/CIRGenValue.h
+++ b/clang/lib/CIR/CodeGen/CIRGenValue.h
@@ -58,6 +58,12 @@ public:
return value;
}
+ /// Return the value of this complex value.
+ mlir::Value getComplexValue() const {
+ assert(isComplex() && "Not a complex!");
+ return value;
+ }
+
/// Return the value of the address of the aggregate.
Address getAggregateAddress() const {
assert(isAggregate() && "Not an aggregate!");
diff --git a/clang/lib/CodeGen/Targets/X86.cpp b/clang/lib/CodeGen/Targets/X86.cpp
index 0b712ac2dabc..abb91486e7ee 100644
--- a/clang/lib/CodeGen/Targets/X86.cpp
+++ b/clang/lib/CodeGen/Targets/X86.cpp
@@ -2470,13 +2470,12 @@ GetSSETypeAtOffset(llvm::Type *IRType, unsigned IROffset,
return llvm::Type::getDoubleTy(getVMContext());
}
-
/// GetINTEGERTypeAtOffset - The ABI specifies that a value should be passed in
-/// an 8-byte GPR. This means that we either have a scalar or we are talking
-/// about the high or low part of an up-to-16-byte struct. This routine picks
-/// the best LLVM IR type to represent this, which may be i64 or may be anything
-/// else that the backend will pass in a GPR that works better (e.g. i8, %foo*,
-/// etc).
+/// one or more 8-byte GPRs. This means that we either have a scalar or we are
+/// talking about the high and/or low part of an up-to-16-byte struct. This
+/// routine picks the best LLVM IR type to represent this, which may be i64 or
+/// may be anything else that the backend will pass in GPRs that works better
+/// (e.g. i8, %foo*, etc).
///
/// PrefType is an LLVM IR type that corresponds to (part of) the IR type for
/// the source type. IROffset is an offset in bytes into the LLVM IR type that
@@ -2534,6 +2533,13 @@ GetINTEGERTypeAtOffset(llvm::Type *IRType, unsigned IROffset,
SourceOffset);
}
+ // if we have a 128-bit integer, we can pass it safely using an i128
+ // so we return that
+ if (IRType->isIntegerTy(128)) {
+ assert(IROffset == 0);
+ return IRType;
+ }
+
// Okay, we don't have any better idea of what to pass, so we pass this in an
// integer register that isn't too big to fit the rest of the struct.
unsigned TySizeInBytes =
@@ -2591,8 +2597,7 @@ GetX86_64ByValArgumentPair(llvm::Type *Lo, llvm::Type *Hi,
return Result;
}
-ABIArgInfo X86_64ABIInfo::
-classifyReturnType(QualType RetTy) const {
+ABIArgInfo X86_64ABIInfo::classifyReturnType(QualType RetTy) const {
// AMD64-ABI 3.2.3p4: Rule 1. Classify the return type with the
// classification algorithm.
X86_64ABIInfo::Class Lo, Hi;
@@ -2638,6 +2643,12 @@ classifyReturnType(QualType RetTy) const {
isPromotableIntegerTypeForABI(RetTy))
return ABIArgInfo::getExtend(RetTy);
}
+
+ if (ResType->isIntegerTy(128)) {
+ // i128 are passed directly
+ assert(Hi == Integer);
+ return ABIArgInfo::getDirect(ResType);
+ }
break;
// AMD64-ABI 3.2.3p4: Rule 4. If the class is SSE, the next
@@ -2783,6 +2794,11 @@ X86_64ABIInfo::classifyArgumentType(QualType Ty, unsigned freeIntRegs,
return ABIArgInfo::getExtend(Ty, CGT.ConvertType(Ty));
}
+ if (ResType->isIntegerTy(128)) {
+ assert(Hi == Integer);
+ ++neededInt;
+ return ABIArgInfo::getDirect(ResType);
+ }
break;
// AMD64-ABI 3.2.3p3: Rule 3. If the class is SSE, the next
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 456bfe885f35..8880c9375143 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -3846,15 +3846,6 @@ static void RenderOpenACCOptions(const Driver &D, const ArgList &Args,
return;
CmdArgs.push_back("-fopenacc");
-
- if (Arg *A = Args.getLastArg(options::OPT_openacc_macro_override)) {
- StringRef Value = A->getValue();
- int Version;
- if (!Value.getAsInteger(10, Version))
- A->renderAsInput(Args, CmdArgs);
- else
- D.Diag(diag::err_drv_clang_unsupported) << Value;
- }
}
static void RenderBuiltinOptions(const ToolChain &TC, const llvm::Triple &T,
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 6ab36d867596..3a36250da57a 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -3913,12 +3913,8 @@ void CompilerInvocationBase::GenerateLangArgs(const LangOptions &Opts,
if (Opts.OpenMPCUDAMode)
GenerateArg(Consumer, OPT_fopenmp_cuda_mode);
- if (Opts.OpenACC) {
+ if (Opts.OpenACC)
GenerateArg(Consumer, OPT_fopenacc);
- if (!Opts.OpenACCMacroOverride.empty())
- GenerateArg(Consumer, OPT_openacc_macro_override,
- Opts.OpenACCMacroOverride);
- }
// The arguments used to set Optimize, OptimizeSize and NoInlineDefine are
// generated from CodeGenOptions.
@@ -4424,13 +4420,9 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,
Args.hasArg(options::OPT_fopenmp_cuda_mode);
// OpenACC Configuration.
- if (Args.hasArg(options::OPT_fopenacc)) {
+ if (Args.hasArg(options::OPT_fopenacc))
Opts.OpenACC = true;
- if (Arg *A = Args.getLastArg(options::OPT_openacc_macro_override))
- Opts.OpenACCMacroOverride = A->getValue();
- }
-
if (Arg *A = Args.getLastArg(OPT_ffp_contract)) {
StringRef Val = A->getValue();
if (Val == "fast")
diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp
index 38b2e0cf1ca5..382ccd610946 100644
--- a/clang/lib/Frontend/InitPreprocessor.cpp
+++ b/clang/lib/Frontend/InitPreprocessor.cpp
@@ -639,16 +639,8 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI,
}
}
- if (LangOpts.OpenACC) {
- // FIXME: When we have full support for OpenACC, we should set this to the
- // version we support. Until then, set as '1' by default, but provide a
- // temporary mechanism for users to override this so real-world examples can
- // be tested against.
- if (!LangOpts.OpenACCMacroOverride.empty())
- Builder.defineMacro("_OPENACC", LangOpts.OpenACCMacroOverride);
- else
- Builder.defineMacro("_OPENACC", "1");
- }
+ if (LangOpts.OpenACC)
+ Builder.defineMacro("_OPENACC", "202506");
}
/// Initialize the predefined C++ language feature test macros defined in
diff --git a/clang/test/CIR/CodeGen/complex-builtins.cpp b/clang/test/CIR/CodeGen/complex-builtins.cpp
index f0d12d0ef666..811af47a704f 100644
--- a/clang/test/CIR/CodeGen/complex-builtins.cpp
+++ b/clang/test/CIR/CodeGen/complex-builtins.cpp
@@ -83,3 +83,39 @@ void foo3() {
// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { double, double }, ptr %[[COMPLEX]], i32 0, i32 1
// OGCG: %[[A_IMAG:.*]] = load double, ptr %[[A_IMAG_PTR]], align 8
// OGCG: store double %[[A_IMAG]], ptr %[[INIT]], align 8
+
+void foo4() {
+ float _Complex a;
+ float _Complex b = __builtin_conjf(a);
+}
+
+// CIR: %[[COMPLEX:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["a"]
+// CIR: %[[RESULT:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["b", init]
+// CIR: %[[TMP:.*]] = cir.load{{.*}} %[[COMPLEX]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
+// CIR: %[[REAL:.*]] = cir.complex.real %[[TMP]] : !cir.complex<!cir.float> -> !cir.float
+// CIR: %[[IMAG:.*]] = cir.complex.imag %[[TMP]] : !cir.complex<!cir.float> -> !cir.float
+// CIR: %[[IMAG_MINUS:.*]] = cir.unary(minus, %[[IMAG]]) : !cir.float, !cir.float
+// CIR: %[[RESULT_VAL:.*]] = cir.complex.create %[[REAL]], %[[IMAG_MINUS]] : !cir.float -> !cir.complex<!cir.float>
+// CIR: cir.store{{.*}} %[[RESULT_VAL]], %[[RESULT]] : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>
+
+// LLVM: %[[COMPLEX:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM: %[[RESULT:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM: %[[TMP:.*]] = load { float, float }, ptr %[[COMPLEX]], align 4
+// LLVM: %[[REAL:.*]] = extractvalue { float, float } %[[TMP]], 0
+// LLVM: %[[IMAG:.*]] = extractvalue { float, float } %[[TMP]], 1
+// LLVM: %[[IMAG_MINUS:.*]] = fneg float %[[IMAG]]
+// LLVM: %[[RESULT_TMP:.*]] = insertvalue { float, float } {{.*}}, float %[[REAL]], 0
+// LLVM: %[[RESULT_VAL:.*]] = insertvalue { float, float } %[[RESULT_TMP]], float %[[IMAG_MINUS]], 1
+// LLVM: store { float, float } %[[RESULT_VAL]], ptr %[[RESULT]], align 4
+
+// OGCG: %[[COMPLEX:.*]] = alloca { float, float }, align 4
+// OGCG: %[[RESULT:.*]] = alloca { float, float }, align 4
+// OGCG: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[COMPLEX]], i32 0, i32 0
+// OGCG: %[[A_REAL:.*]] = load float, ptr %[[A_REAL_PTR]], align 4
+// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[COMPLEX]], i32 0, i32 1
+// OGCG: %[[A_IMAG:.*]] = load float, ptr %[[A_IMAG_PTR]], align 4
+// OGCG: %[[A_IMAG_MINUS:.*]] = fneg float %[[A_IMAG]]
+// OGCG: %[[RESULT_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[RESULT]], i32 0, i32 0
+// OGCG: %[[RESULT_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[RESULT]], i32 0, i32 1
+// OGCG: store float %[[A_REAL]], ptr %[[RESULT_REAL_PTR]], align 4
+// OGCG: store float %[[A_IMAG_MINUS]], ptr %[[RESULT_IMAG_PTR]], align 4
diff --git a/clang/test/CodeGen/X86/i128-debuginfo.c b/clang/test/CodeGen/X86/i128-debuginfo.c
new file mode 100644
index 000000000000..4b865c1bed9f
--- /dev/null
+++ b/clang/test/CodeGen/X86/i128-debuginfo.c
@@ -0,0 +1,10 @@
+// no autogeneration since update_cc_test_checks does not support -g
+// RUN: %clang_cc1 -triple x86_64-pc-linux -O1 -debug-info-kind=limited -emit-llvm -o - %s | FileCheck %s
+
+// CHECK-LABEL: define{{.*}} i128 @add(i128 noundef %a)
+// CHECK: #dbg_value(i128 %a, ![[DI:.*]], !DIExpression()
+__int128_t add(__int128_t a) {
+ return a + a;
+}
+
+// CHECK: ![[DI]] = !DILocalVariable(name: "a", arg: 1
diff --git a/clang/test/CodeGen/X86/x86_64-arguments.c b/clang/test/CodeGen/X86/x86_64-arguments.c
index 82845f0a2b31..580f9487395d 100644
--- a/clang/test/CodeGen/X86/x86_64-arguments.c
+++ b/clang/test/CodeGen/X86/x86_64-arguments.c
@@ -551,6 +551,45 @@ struct s68 {
void f68(struct s68 x) {
}
+// CHECK-LABEL: define{{.*}} i128 @f69(i128 noundef %a)
+__int128_t f69(__int128_t a) {
+ return a;
+}
+
+// CHECK-LABEL: define{{.*}} i128 @f70(i128 noundef %a)
+__uint128_t f70(__uint128_t a) {
+ return a;
+}
+
+// check that registers are correctly counted for (u)int128_t arguments
+struct s71 {
+ long long a, b;
+};
+// CHECK-LABEL: define{{.*}} void @f71(i128 noundef %a, i128 noundef %b, i64 noundef %c, ptr noundef byval(%struct.s71) align 8 %d)
+void f71(__int128_t a, __int128_t b, long long c, struct s71 d) {
+}
+// CHECK-LABEL: define{{.*}} void @f72(i128 noundef %a, i128 noundef %b, i64 %d.coerce0, i64 %d.coerce1)
+void f72(__int128_t a, __int128_t b, struct s71 d) {
+}
+
+// check that structs containing (u)int128_t are passed correctly
+struct s73 {
+ struct inner {
+ __uint128_t a;
+ };
+ struct inner in;
+};
+// CHECK-LABEL: define{{.*}} i128 @f73(i128 %a.coerce)
+struct s73 f73(struct s73 a) {
+ return a;
+}
+
+// check that _BitInt(128) is still passed correctly on the stack
+// CHECK-LABEL: define{{.*}} i128 @f74(i128 noundef %b, i128 noundef %c, i128 noundef %d, i64 noundef %e, ptr noundef byval(i128) align 8 %0)
+_BitInt(128) f74(__uint128_t b, __uint128_t c, __uint128_t d, long e, _BitInt(128) a) {
+ return a;
+}
+
/// The synthesized __va_list_tag does not have file/line fields.
// CHECK: = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "__va_list_tag",
// CHECK-NOT: file:
diff --git a/clang/test/CodeGen/alloc-align-attr.c b/clang/test/CodeGen/alloc-align-attr.c
index 76e5d1041b19..c4c4e76eaaa0 100644
--- a/clang/test/CodeGen/alloc-align-attr.c
+++ b/clang/test/CodeGen/alloc-align-attr.c
@@ -70,66 +70,42 @@ __INT32_TYPE__ test4(__SIZE_TYPE__ a) {
struct Empty {};
struct MultiArgs { __INT64_TYPE__ a, b;};
-// Struct parameter doesn't take up an IR parameter, 'i' takes up 2.
+// Struct parameter doesn't take up an IR parameter, 'i' takes up 1.
// Truncation to i64 is permissible, since alignments of greater than 2^64 are insane.
__INT32_TYPE__ *m3(struct Empty s, __int128_t i) __attribute__((alloc_align(2)));
// CHECK-LABEL: @test5(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[A:%.*]] = alloca i128, align 16
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i128, align 16
// CHECK-NEXT: [[E:%.*]] = alloca [[STRUCT_EMPTY:%.*]], align 1
-// CHECK-NEXT: [[COERCE:%.*]] = alloca i128, align 16
-// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[A]], i32 0, i32 0
-// CHECK-NEXT: store i64 [[A_COERCE0:%.*]], ptr [[TMP0]], align 16
-// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[A]], i32 0, i32 1
-// CHECK-NEXT: store i64 [[A_COERCE1:%.*]], ptr [[TMP1]], align 8
-// CHECK-NEXT: [[A1:%.*]] = load i128, ptr [[A]], align 16
-// CHECK-NEXT: store i128 [[A1]], ptr [[A_ADDR]], align 16
-// CHECK-NEXT: [[TMP2:%.*]] = load i128, ptr [[A_ADDR]], align 16
-// CHECK-NEXT: store i128 [[TMP2]], ptr [[COERCE]], align 16
-// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[COERCE]], i32 0, i32 0
-// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 16
-// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[COERCE]], i32 0, i32 1
-// CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8
-// CHECK-NEXT: [[CALL:%.*]] = call ptr @m3(i64 noundef [[TMP4]], i64 noundef [[TMP6]])
-// CHECK-NEXT: [[CASTED_ALIGN:%.*]] = trunc i128 [[TMP2]] to i64
+// CHECK-NEXT: store i128 [[A:%.*]], ptr [[A_ADDR]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr [[A_ADDR]], align 16
+// CHECK-NEXT: [[CALL:%.*]] = call ptr @m3(i128 noundef [[TMP0]])
+// CHECK-NEXT: [[CASTED_ALIGN:%.*]] = trunc i128 [[TMP0]] to i64
// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[CALL]], i64 [[CASTED_ALIGN]]) ]
-// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[CALL]], align 4
-// CHECK-NEXT: ret i32 [[TMP7]]
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[CALL]], align 4
+// CHECK-NEXT: ret i32 [[TMP1]]
//
__INT32_TYPE__ test5(__int128_t a) {
struct Empty e;
return *m3(e, a);
}
-// Struct parameter takes up 2 parameters, 'i' takes up 2.
+// Struct parameter takes up 2 parameters, 'i' takes up 1.
__INT32_TYPE__ *m4(struct MultiArgs s, __int128_t i) __attribute__((alloc_align(2)));
// CHECK-LABEL: @test6(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[A:%.*]] = alloca i128, align 16
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i128, align 16
// CHECK-NEXT: [[E:%.*]] = alloca [[STRUCT_MULTIARGS:%.*]], align 8
-// CHECK-NEXT: [[COERCE:%.*]] = alloca i128, align 16
-// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[A]], i32 0, i32 0
-// CHECK-NEXT: store i64 [[A_COERCE0:%.*]], ptr [[TMP0]], align 16
-// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[A]], i32 0, i32 1
-// CHECK-NEXT: store i64 [[A_COERCE1:%.*]], ptr [[TMP1]], align 8
-// CHECK-NEXT: [[A1:%.*]] = load i128, ptr [[A]], align 16
-// CHECK-NEXT: store i128 [[A1]], ptr [[A_ADDR]], align 16
-// CHECK-NEXT: [[TMP2:%.*]] = load i128, ptr [[A_ADDR]], align 16
-// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[E]], i32 0, i32 0
+// CHECK-NEXT: store i128 [[A:%.*]], ptr [[A_ADDR]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr [[A_ADDR]], align 16
+// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[E]], i32 0, i32 0
+// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8
+// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[E]], i32 0, i32 1
// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8
-// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[E]], i32 0, i32 1
-// CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8
-// CHECK-NEXT: store i128 [[TMP2]], ptr [[COERCE]], align 16
-// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[COERCE]], i32 0, i32 0
-// CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 16
-// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[COERCE]], i32 0, i32 1
-// CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 8
-// CHECK-NEXT: [[CALL:%.*]] = call ptr @m4(i64 [[TMP4]], i64 [[TMP6]], i64 noundef [[TMP8]], i64 noundef [[TMP10]])
-// CHECK-NEXT: [[CASTED_ALIGN:%.*]] = trunc i128 [[TMP2]] to i64
+// CHECK-NEXT: [[CALL:%.*]] = call ptr @m4(i64 [[TMP2]], i64 [[TMP4]], i128 noundef [[TMP0]])
+// CHECK-NEXT: [[CASTED_ALIGN:%.*]] = trunc i128 [[TMP0]] to i64
// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[CALL]], i64 [[CASTED_ALIGN]]) ]
-// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[CALL]], align 4
-// CHECK-NEXT: ret i32 [[TMP11]]
+// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[CALL]], align 4
+// CHECK-NEXT: ret i32 [[TMP5]]
//
__INT32_TYPE__ test6(__int128_t a) {
struct MultiArgs e;
diff --git a/clang/test/CodeGen/builtins.c b/clang/test/CodeGen/builtins.c
index eda6c67fdad0..aa9965b81598 100644
--- a/clang/test/CodeGen/builtins.c
+++ b/clang/test/CodeGen/builtins.c
@@ -956,36 +956,24 @@ void test_builtin_os_log_errno(void) {
void test_builtin_os_log_long_double(void *buf, long double ld) {
// CHECK: %[[BUF_ADDR:.*]] = alloca ptr, align 8
// CHECK: %[[LD_ADDR:.*]] = alloca x86_fp80, align 16
- // CHECK: %[[COERCE:.*]] = alloca i128, align 16
// CHECK: store ptr %[[BUF]], ptr %[[BUF_ADDR]], align 8
// CHECK: store x86_fp80 %[[LD]], ptr %[[LD_ADDR]], align 16
// CHECK: %[[V0:.*]] = load ptr, ptr %[[BUF_ADDR]], align 8
// CHECK: %[[V1:.*]] = load x86_fp80, ptr %[[LD_ADDR]], align 16
// CHECK: %[[V2:.*]] = bitcast x86_fp80 %[[V1]] to i80
// CHECK: %[[V3:.*]] = zext i80 %[[V2]] to i128
- // CHECK: store i128 %[[V3]], ptr %[[COERCE]], align 16
- // CHECK: %[[V5:.*]] = getelementptr inbounds nuw { i64, i64 }, ptr %[[COERCE]], i32 0, i32 0
- // CHECK: %[[V6:.*]] = load i64, ptr %[[V5]], align 16
- // CHECK: %[[V7:.*]] = getelementptr inbounds nuw { i64, i64 }, ptr %[[COERCE]], i32 0, i32 1
- // CHECK: %[[V8:.*]] = load i64, ptr %[[V7]], align 8
- // CHECK: call void @__os_log_helper_1_0_1_16_0(ptr noundef %[[V0]], i64 noundef %[[V6]], i64 noundef %[[V8]])
+ // CHECK: call void @__os_log_helper_1_0_1_16_0(ptr noundef %[[V0]], i128 noundef %[[V3]])
__builtin_os_log_format(buf, "%Lf", ld);
}
// CHECK-LABEL: define linkonce_odr hidden void @__os_log_helper_1_0_1_16_0
-// CHECK: (ptr noundef %[[BUFFER:.*]], i64 noundef %[[ARG0_COERCE0:.*]], i64 noundef %[[ARG0_COERCE1:.*]])
+// CHECK: (ptr noundef %[[BUFFER:.*]], i128 noundef %[[ARG0:.*]])
-// CHECK: %[[ARG0:.*]] = alloca i128, align 16
// CHECK: %[[BUFFER_ADDR:.*]] = alloca ptr, align 8
// CHECK: %[[ARG0_ADDR:.*]] = alloca i128, align 16
-// CHECK: %[[V1:.*]] = getelementptr inbounds nuw { i64, i64 }, ptr %[[ARG0]], i32 0, i32 0
-// CHECK: store i64 %[[ARG0_COERCE0]], ptr %[[V1]], align 16
-// CHECK: %[[V2:.*]] = getelementptr inbounds nuw { i64, i64 }, ptr %[[ARG0]], i32 0, i32 1
-// CHECK: store i64 %[[ARG0_COERCE1]], ptr %[[V2]], align 8
-// CHECK: %[[ARG01:.*]] = load i128, ptr %[[ARG0]], align 16
// CHECK: store ptr %[[BUFFER]], ptr %[[BUFFER_ADDR]], align 8
-// CHECK: store i128 %[[ARG01]], ptr %[[ARG0_ADDR]], align 16
+// CHECK: store i128 %[[ARG0]], ptr %[[ARG0_ADDR]], align 16
// CHECK: %[[BUF:.*]] = load ptr, ptr %[[BUFFER_ADDR]], align 8
// CHECK: %[[SUMMARY:.*]] = getelementptr i8, ptr %[[BUF]], i64 0
// CHECK: store i8 0, ptr %[[SUMMARY]], align 1
diff --git a/clang/test/CodeGen/ext-int-cc.c b/clang/test/CodeGen/ext-int-cc.c
index f31a4eb240c2..fdca4012ee4a 100644
--- a/clang/test/CodeGen/ext-int-cc.c
+++ b/clang/test/CodeGen/ext-int-cc.c
@@ -32,7 +32,7 @@
// Make sure 128 and 64 bit versions are passed like integers.
void ParamPassing(_BitInt(128) b, _BitInt(64) c) {}
-// LIN64: define{{.*}} void @ParamPassing(i64 %{{.+}}, i64 %{{.+}}, i64 %{{.+}})
+// LIN64: define{{.*}} void @ParamPassing(i128 %{{.+}}, i64 %{{.+}})
// WIN64: define dso_local void @ParamPassing(ptr %{{.+}}, i64 %{{.+}})
// LIN32: define{{.*}} void @ParamPassing(ptr %{{.+}}, i64 %{{.+}})
// WIN32: define dso_local void @ParamPassing(ptr %{{.+}}, i64 %{{.+}})
@@ -251,7 +251,7 @@ _BitInt(127) ReturnPassing3(void) { return 0; }
// LA32: define{{.*}} void @ReturnPassing3(ptr dead_on_unwind noalias writable sret
_BitInt(128) ReturnPassing4(void) { return 0; }
-// LIN64: define{{.*}} { i64, i64 } @ReturnPassing4(
+// LIN64: define{{.*}} i128 @ReturnPassing4(
// WIN64: define dso_local void @ReturnPassing4(ptr dead_on_unwind noalias writable sret
// LIN32: define{{.*}} void @ReturnPassing4(ptr dead_on_unwind noalias writable sret
// WIN32: define dso_local void @ReturnPassing4(ptr dead_on_unwind noalias writable sret
diff --git a/clang/test/CodeGen/extend-arg-64.c b/clang/test/CodeGen/extend-arg-64.c
index 2cb56d35af21..8b99c01807ec 100644
--- a/clang/test/CodeGen/extend-arg-64.c
+++ b/clang/test/CodeGen/extend-arg-64.c
@@ -84,7 +84,7 @@ int test(void) {
#ifdef D128
knr(i128);
// CHECKEXT: load i128
- // CHECKEXT: call{{.*}} void (i64, i64, ...) @knr
+ // CHECKEXT: call{{.*}} void (i128, ...) @knr
#endif
knr(u32, s32, u16, s16, u8, s8);
diff --git a/clang/test/Driver/openacc.c b/clang/test/Driver/openacc.c
index c7f1d2545bd0..f46e2a32bcab 100644
--- a/clang/test/Driver/openacc.c
+++ b/clang/test/Driver/openacc.c
@@ -1,14 +1,2 @@
// RUN: %clang -S -### -fopenacc %s 2>&1 | FileCheck %s --check-prefix=CHECK-DRIVER
// CHECK-DRIVER: "-cc1" {{.*}} "-fopenacc"
-
-// RUN: %clang -S -### -fopenacc -fexperimental-openacc-macro-override=202211 %s 2>&1 | FileCheck %s --check-prefix=CHECK-MACRO-OVERRIDE
-// RUN: %clang -S -### -fopenacc -fexperimental-openacc-macro-override 202211 %s 2>&1 | FileCheck %s --check-prefix=CHECK-MACRO-OVERRIDE
-// CHECK-MACRO-OVERRIDE: "-cc1"{{.*}} "-fexperimental-openacc-macro-override" "202211"
-
-// RUN: not %clang -S -fopenacc -fexperimental-openacc-macro-override=202211L %s 2>&1 | FileCheck %s --check-prefix=INVALID
-// RUN: not %clang -S -fopenacc -fexperimental-openacc-macro-override 202211L %s 2>&1 | FileCheck %s --check-prefix=INVALID
-// RUN: not %clang -S -fopenacc -fexperimental-openacc-macro-override=L202211 %s 2>&1 | FileCheck %s --check-prefix=INVALID
-// RUN: not %clang -S -fopenacc -fexperimental-openacc-macro-override L202211 %s 2>&1 | FileCheck %s --check-prefix=INVALID
-// RUN: not %clang -S -fopenacc -fexperimental-openacc-macro-override=2022L11 %s 2>&1 | FileCheck %s --check-prefix=INVALID
-// RUN: not %clang -S -fopenacc -fexperimental-openacc-macro-override 2022L11 %s 2>&1 | FileCheck %s --check-prefix=INVALID
-// INVALID: error: the clang compiler does not support
diff --git a/clang/test/Preprocessor/openacc.c b/clang/test/Preprocessor/openacc.c
index be7052f00e0c..283baa6c2fe4 100644
--- a/clang/test/Preprocessor/openacc.c
+++ b/clang/test/Preprocessor/openacc.c
@@ -1,13 +1,9 @@
// RUN: %clang_cc1 -E -fopenacc %s | FileCheck %s --check-prefix=DEFAULT
-// RUN: %clang_cc1 -E -fopenacc -fexperimental-openacc-macro-override 202211 %s | FileCheck %s --check-prefix=OVERRIDE
-// DEFAULT: OpenACC:1:
-// OVERRIDE: OpenACC:202211:
+// DEFAULT: OpenACC:202506:
OpenACC:_OPENACC:
// RUN: %clang_cc1 -E -dM -fopenacc %s | FileCheck %s --check-prefix=MACRO_PRINT_DEF
-// RUN: %clang_cc1 -E -dM -fopenacc -fexperimental-openacc-macro-override 202211 %s | FileCheck %s --check-prefix=MACRO_PRINT_OVR
-// MACRO_PRINT_DEF: #define _OPENACC 1
-// MACRO_PRINT_OVR: #define _OPENACC 202211
+// MACRO_PRINT_DEF: #define _OPENACC 202506
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 39e4444cde4e..51eb33dec186 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -708,6 +708,7 @@ genDataOperandOperations(const Fortran::parser::AccObjectList &objectList,
bool setDeclareAttr = false) {
fir::FirOpBuilder &builder = converter.getFirOpBuilder();
Fortran::evaluate::ExpressionAnalyzer ea{semanticsContext};
+ const bool unwrapBoxAddr = true;
for (const auto &accObject : objectList.v) {
llvm::SmallVector<mlir::Value> bounds;
std::stringstream asFortran;
@@ -735,8 +736,25 @@ genDataOperandOperations(const Fortran::parser::AccObjectList &objectList,
Op op = createDataEntryOp<Op>(
builder, operandLocation, baseAddr, asFortran, bounds, structured,
implicit, dataClause, baseAddr.getType(), async, asyncDeviceTypes,
- asyncOnlyDeviceTypes, /*unwrapBoxAddr=*/true, info.isPresent);
+ asyncOnlyDeviceTypes, unwrapBoxAddr, info.isPresent);
dataOperands.push_back(op.getAccVar());
+
+ // For UseDeviceOp, if operand is one of a pair resulting from a
+ // declare operation, create a UseDeviceOp for the other operand as well.
+ if constexpr (std::is_same_v<Op, mlir::acc::UseDeviceOp>) {
+ if (auto declareOp =
+ mlir::dyn_cast<hlfir::DeclareOp>(baseAddr.getDefiningOp())) {
+ mlir::Value otherAddr = declareOp.getResult(1);
+ if (baseAddr != otherAddr) {
+ Op op = createDataEntryOp<Op>(builder, operandLocation, otherAddr,
+ asFortran, bounds, structured, implicit,
+ dataClause, otherAddr.getType(), async,
+ asyncDeviceTypes, asyncOnlyDeviceTypes,
+ unwrapBoxAddr, info.isPresent);
+ dataOperands.push_back(op.getAccVar());
+ }
+ }
+ }
}
}
@@ -4396,10 +4414,34 @@ getAttributeValueByDeviceType(llvm::SmallVector<mlir::Attribute> &attributes,
return std::nullopt;
}
+// Helper function to extract string value from bind name variant
+static std::optional<llvm::StringRef> getBindNameStringValue(
+ const std::optional<std::variant<mlir::SymbolRefAttr, mlir::StringAttr>>
+ &bindNameValue) {
+ if (!bindNameValue.has_value())
+ return std::nullopt;
+
+ return std::visit(
+ [](const auto &attr) -> std::optional<llvm::StringRef> {
+ if constexpr (std::is_same_v<std::decay_t<decltype(attr)>,
+ mlir::StringAttr>) {
+ return attr.getValue();
+ } else if constexpr (std::is_same_v<std::decay_t<decltype(attr)>,
+ mlir::SymbolRefAttr>) {
+ return attr.getLeafReference();
+ } else {
+ return std::nullopt;
+ }
+ },
+ bindNameValue.value());
+}
+
static bool compareDeviceTypeInfo(
mlir::acc::RoutineOp op,
- llvm::SmallVector<mlir::Attribute> &bindNameArrayAttr,
- llvm::SmallVector<mlir::Attribute> &bindNameDeviceTypeArrayAttr,
+ llvm::SmallVector<mlir::Attribute> &bindIdNameArrayAttr,
+ llvm::SmallVector<mlir::Attribute> &bindStrNameArrayAttr,
+ llvm::SmallVector<mlir::Attribute> &bindIdNameDeviceTypeArrayAttr,
+ llvm::SmallVector<mlir::Attribute> &bindStrNameDeviceTypeArrayAttr,
llvm::SmallVector<mlir::Attribute> &gangArrayAttr,
llvm::SmallVector<mlir::Attribute> &gangDimArrayAttr,
llvm::SmallVector<mlir::Attribute> &gangDimDeviceTypeArrayAttr,
@@ -4409,9 +4451,13 @@ static bool compareDeviceTypeInfo(
for (uint32_t dtypeInt = 0;
dtypeInt != mlir::acc::getMaxEnumValForDeviceType(); ++dtypeInt) {
auto dtype = static_cast<mlir::acc::DeviceType>(dtypeInt);
- if (op.getBindNameValue(dtype) !=
- getAttributeValueByDeviceType<llvm::StringRef, mlir::StringAttr>(
- bindNameArrayAttr, bindNameDeviceTypeArrayAttr, dtype))
+ auto bindNameValue = getBindNameStringValue(op.getBindNameValue(dtype));
+ if (bindNameValue !=
+ getAttributeValueByDeviceType<llvm::StringRef, mlir::StringAttr>(
+ bindIdNameArrayAttr, bindIdNameDeviceTypeArrayAttr, dtype) &&
+ bindNameValue !=
+ getAttributeValueByDeviceType<llvm::StringRef, mlir::StringAttr>(
+ bindStrNameArrayAttr, bindStrNameDeviceTypeArrayAttr, dtype))
return false;
if (op.hasGang(dtype) != hasDeviceType(gangArrayAttr, dtype))
return false;
@@ -4458,8 +4504,10 @@ getArrayAttrOrNull(fir::FirOpBuilder &builder,
void createOpenACCRoutineConstruct(
Fortran::lower::AbstractConverter &converter, mlir::Location loc,
mlir::ModuleOp mod, mlir::func::FuncOp funcOp, std::string funcName,
- bool hasNohost, llvm::SmallVector<mlir::Attribute> &bindNames,
- llvm::SmallVector<mlir::Attribute> &bindNameDeviceTypes,
+ bool hasNohost, llvm::SmallVector<mlir::Attribute> &bindIdNames,
+ llvm::SmallVector<mlir::Attribute> &bindStrNames,
+ llvm::SmallVector<mlir::Attribute> &bindIdNameDeviceTypes,
+ llvm::SmallVector<mlir::Attribute> &bindStrNameDeviceTypes,
llvm::SmallVector<mlir::Attribute> &gangDeviceTypes,
llvm::SmallVector<mlir::Attribute> &gangDimValues,
llvm::SmallVector<mlir::Attribute> &gangDimDeviceTypes,
@@ -4472,7 +4520,8 @@ void createOpenACCRoutineConstruct(
0) {
// If the routine is already specified with the same clauses, just skip
// the operation creation.
- if (compareDeviceTypeInfo(routineOp, bindNames, bindNameDeviceTypes,
+ if (compareDeviceTypeInfo(routineOp, bindIdNames, bindStrNames,
+ bindIdNameDeviceTypes, bindStrNameDeviceTypes,
gangDeviceTypes, gangDimValues,
gangDimDeviceTypes, seqDeviceTypes,
workerDeviceTypes, vectorDeviceTypes) &&
@@ -4489,8 +4538,10 @@ void createOpenACCRoutineConstruct(
modBuilder.create<mlir::acc::RoutineOp>(
loc, routineOpStr,
mlir::SymbolRefAttr::get(builder.getContext(), funcName),
- getArrayAttrOrNull(builder, bindNames),
- getArrayAttrOrNull(builder, bindNameDeviceTypes),
+ getArrayAttrOrNull(builder, bindIdNames),
+ getArrayAttrOrNull(builder, bindStrNames),
+ getArrayAttrOrNull(builder, bindIdNameDeviceTypes),
+ getArrayAttrOrNull(builder, bindStrNameDeviceTypes),
getArrayAttrOrNull(builder, workerDeviceTypes),
getArrayAttrOrNull(builder, vectorDeviceTypes),
getArrayAttrOrNull(builder, seqDeviceTypes), hasNohost,
@@ -4507,8 +4558,10 @@ static void interpretRoutineDeviceInfo(
llvm::SmallVector<mlir::Attribute> &seqDeviceTypes,
llvm::SmallVector<mlir::Attribute> &vectorDeviceTypes,
llvm::SmallVector<mlir::Attribute> &workerDeviceTypes,
- llvm::SmallVector<mlir::Attribute> &bindNameDeviceTypes,
- llvm::SmallVector<mlir::Attribute> &bindNames,
+ llvm::SmallVector<mlir::Attribute> &bindIdNameDeviceTypes,
+ llvm::SmallVector<mlir::Attribute> &bindStrNameDeviceTypes,
+ llvm::SmallVector<mlir::Attribute> &bindIdNames,
+ llvm::SmallVector<mlir::Attribute> &bindStrNames,
llvm::SmallVector<mlir::Attribute> &gangDeviceTypes,
llvm::SmallVector<mlir::Attribute> &gangDimValues,
llvm::SmallVector<mlir::Attribute> &gangDimDeviceTypes) {
@@ -4541,16 +4594,18 @@ static void interpretRoutineDeviceInfo(
if (dinfo.bindNameOpt().has_value()) {
const auto &bindName = dinfo.bindNameOpt().value();
mlir::Attribute bindNameAttr;
- if (const auto &bindStr{std::get_if<std::string>(&bindName)}) {
+ if (const auto &bindSym{
+ std::get_if<Fortran::semantics::SymbolRef>(&bindName)}) {
+ bindNameAttr = builder.getSymbolRefAttr(converter.mangleName(*bindSym));
+ bindIdNames.push_back(bindNameAttr);
+ bindIdNameDeviceTypes.push_back(getDeviceTypeAttr());
+ } else if (const auto &bindStr{std::get_if<std::string>(&bindName)}) {
bindNameAttr = builder.getStringAttr(*bindStr);
- } else if (const auto &bindSym{
- std::get_if<Fortran::semantics::SymbolRef>(&bindName)}) {
- bindNameAttr = builder.getStringAttr(converter.mangleName(*bindSym));
+ bindStrNames.push_back(bindNameAttr);
+ bindStrNameDeviceTypes.push_back(getDeviceTypeAttr());
} else {
llvm_unreachable("Unsupported bind name type");
}
- bindNames.push_back(bindNameAttr);
- bindNameDeviceTypes.push_back(getDeviceTypeAttr());
}
}
@@ -4566,8 +4621,9 @@ void Fortran::lower::genOpenACCRoutineConstruct(
bool hasNohost{false};
llvm::SmallVector<mlir::Attribute> seqDeviceTypes, vectorDeviceTypes,
- workerDeviceTypes, bindNameDeviceTypes, bindNames, gangDeviceTypes,
- gangDimDeviceTypes, gangDimValues;
+ workerDeviceTypes, bindIdNameDeviceTypes, bindStrNameDeviceTypes,
+ bindIdNames, bindStrNames, gangDeviceTypes, gangDimDeviceTypes,
+ gangDimValues;
for (const Fortran::semantics::OpenACCRoutineInfo &info : routineInfos) {
// Device Independent Attributes
@@ -4576,24 +4632,26 @@ void Fortran::lower::genOpenACCRoutineConstruct(
}
// Note: Device Independent Attributes are set to the
// none device type in `info`.
- interpretRoutineDeviceInfo(converter, info, seqDeviceTypes,
- vectorDeviceTypes, workerDeviceTypes,
- bindNameDeviceTypes, bindNames, gangDeviceTypes,
- gangDimValues, gangDimDeviceTypes);
+ interpretRoutineDeviceInfo(
+ converter, info, seqDeviceTypes, vectorDeviceTypes, workerDeviceTypes,
+ bindIdNameDeviceTypes, bindStrNameDeviceTypes, bindIdNames,
+ bindStrNames, gangDeviceTypes, gangDimValues, gangDimDeviceTypes);
// Device Dependent Attributes
for (const Fortran::semantics::OpenACCRoutineDeviceTypeInfo &dinfo :
info.deviceTypeInfos()) {
- interpretRoutineDeviceInfo(
- converter, dinfo, seqDeviceTypes, vectorDeviceTypes,
- workerDeviceTypes, bindNameDeviceTypes, bindNames, gangDeviceTypes,
- gangDimValues, gangDimDeviceTypes);
+ interpretRoutineDeviceInfo(converter, dinfo, seqDeviceTypes,
+ vectorDeviceTypes, workerDeviceTypes,
+ bindIdNameDeviceTypes, bindStrNameDeviceTypes,
+ bindIdNames, bindStrNames, gangDeviceTypes,
+ gangDimValues, gangDimDeviceTypes);
}
}
createOpenACCRoutineConstruct(
- converter, loc, mod, funcOp, funcName, hasNohost, bindNames,
- bindNameDeviceTypes, gangDeviceTypes, gangDimValues, gangDimDeviceTypes,
- seqDeviceTypes, workerDeviceTypes, vectorDeviceTypes);
+ converter, loc, mod, funcOp, funcName, hasNohost, bindIdNames,
+ bindStrNames, bindIdNameDeviceTypes, bindStrNameDeviceTypes,
+ gangDeviceTypes, gangDimValues, gangDimDeviceTypes, seqDeviceTypes,
+ workerDeviceTypes, vectorDeviceTypes);
}
static void
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 4458f62eea95..fcb20fdf187f 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -372,90 +372,6 @@ extractMappedBaseValues(llvm::ArrayRef<mlir::Value> vars,
});
}
-/// Get the directive enumeration value corresponding to the given OpenMP
-/// construct PFT node.
-llvm::omp::Directive
-extractOmpDirective(const parser::OpenMPConstruct &ompConstruct) {
- return common::visit(
- common::visitors{
- [](const parser::OpenMPAllocatorsConstruct &c) {
- return llvm::omp::OMPD_allocators;
- },
- [](const parser::OpenMPAssumeConstruct &c) {
- return llvm::omp::OMPD_assume;
- },
- [](const parser::OpenMPAtomicConstruct &c) {
- return llvm::omp::OMPD_atomic;
- },
- [](const parser::OpenMPBlockConstruct &c) {
- return std::get<parser::OmpBlockDirective>(
- std::get<parser::OmpBeginBlockDirective>(c.t).t)
- .v;
- },
- [](const parser::OpenMPCriticalConstruct &c) {
- return llvm::omp::OMPD_critical;
- },
- [](const parser::OpenMPDeclarativeAllocate &c) {
- return llvm::omp::OMPD_allocate;
- },
- [](const parser::OpenMPDispatchConstruct &c) {
- return llvm::omp::OMPD_dispatch;
- },
- [](const parser::OpenMPExecutableAllocate &c) {
- return llvm::omp::OMPD_allocate;
- },
- [](const parser::OpenMPLoopConstruct &c) {
- return std::get<parser::OmpLoopDirective>(
- std::get<parser::OmpBeginLoopDirective>(c.t).t)
- .v;
- },
- [](const parser::OpenMPSectionConstruct &c) {
- return llvm::omp::OMPD_section;
- },
- [](const parser::OpenMPSectionsConstruct &c) {
- return std::get<parser::OmpSectionsDirective>(
- std::get<parser::OmpBeginSectionsDirective>(c.t).t)
- .v;
- },
- [](const parser::OpenMPStandaloneConstruct &c) {
- return common::visit(
- common::visitors{
- [](const parser::OpenMPSimpleStandaloneConstruct &c) {
- return c.v.DirId();
- },
- [](const parser::OpenMPFlushConstruct &c) {
- return llvm::omp::OMPD_flush;
- },
- [](const parser::OpenMPCancelConstruct &c) {
- return llvm::omp::OMPD_cancel;
- },
- [](const parser::OpenMPCancellationPointConstruct &c) {
- return llvm::omp::OMPD_cancellation_point;
- },
- [](const parser::OmpMetadirectiveDirective &c) {
- return llvm::omp::OMPD_metadirective;
- },
- [](const parser::OpenMPDepobjConstruct &c) {
- return llvm::omp::OMPD_depobj;
- },
- [](const parser::OpenMPInteropConstruct &c) {
- return llvm::omp::OMPD_interop;
- }},
- c.u);
- },
- [](const parser::OpenMPUtilityConstruct &c) {
- return common::visit(
- common::visitors{[](const parser::OmpErrorDirective &c) {
- return llvm::omp::OMPD_error;
- },
- [](const parser::OmpNothingDirective &c) {
- return llvm::omp::OMPD_nothing;
- }},
- c.u);
- }},
- ompConstruct.u);
-}
-
/// Populate the global \see hostEvalInfo after processing clauses for the given
/// \p eval OpenMP target construct, or nested constructs, if these must be
/// evaluated outside of the target region per the spec.
diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp
index 2e53f01f1da6..b194150c0f7f 100644
--- a/flang/lib/Lower/OpenMP/Utils.cpp
+++ b/flang/lib/Lower/OpenMP/Utils.cpp
@@ -661,6 +661,90 @@ bool collectLoopRelatedInfo(
return found;
}
+
+/// Get the directive enumeration value corresponding to the given OpenMP
+/// construct PFT node.
+llvm::omp::Directive
+extractOmpDirective(const parser::OpenMPConstruct &ompConstruct) {
+ return common::visit(
+ common::visitors{
+ [](const parser::OpenMPAllocatorsConstruct &c) {
+ return llvm::omp::OMPD_allocators;
+ },
+ [](const parser::OpenMPAssumeConstruct &c) {
+ return llvm::omp::OMPD_assume;
+ },
+ [](const parser::OpenMPAtomicConstruct &c) {
+ return llvm::omp::OMPD_atomic;
+ },
+ [](const parser::OpenMPBlockConstruct &c) {
+ return std::get<parser::OmpBlockDirective>(
+ std::get<parser::OmpBeginBlockDirective>(c.t).t)
+ .v;
+ },
+ [](const parser::OpenMPCriticalConstruct &c) {
+ return llvm::omp::OMPD_critical;
+ },
+ [](const parser::OpenMPDeclarativeAllocate &c) {
+ return llvm::omp::OMPD_allocate;
+ },
+ [](const parser::OpenMPDispatchConstruct &c) {
+ return llvm::omp::OMPD_dispatch;
+ },
+ [](const parser::OpenMPExecutableAllocate &c) {
+ return llvm::omp::OMPD_allocate;
+ },
+ [](const parser::OpenMPLoopConstruct &c) {
+ return std::get<parser::OmpLoopDirective>(
+ std::get<parser::OmpBeginLoopDirective>(c.t).t)
+ .v;
+ },
+ [](const parser::OpenMPSectionConstruct &c) {
+ return llvm::omp::OMPD_section;
+ },
+ [](const parser::OpenMPSectionsConstruct &c) {
+ return std::get<parser::OmpSectionsDirective>(
+ std::get<parser::OmpBeginSectionsDirective>(c.t).t)
+ .v;
+ },
+ [](const parser::OpenMPStandaloneConstruct &c) {
+ return common::visit(
+ common::visitors{
+ [](const parser::OpenMPSimpleStandaloneConstruct &c) {
+ return c.v.DirId();
+ },
+ [](const parser::OpenMPFlushConstruct &c) {
+ return llvm::omp::OMPD_flush;
+ },
+ [](const parser::OpenMPCancelConstruct &c) {
+ return llvm::omp::OMPD_cancel;
+ },
+ [](const parser::OpenMPCancellationPointConstruct &c) {
+ return llvm::omp::OMPD_cancellation_point;
+ },
+ [](const parser::OmpMetadirectiveDirective &c) {
+ return llvm::omp::OMPD_metadirective;
+ },
+ [](const parser::OpenMPDepobjConstruct &c) {
+ return llvm::omp::OMPD_depobj;
+ },
+ [](const parser::OpenMPInteropConstruct &c) {
+ return llvm::omp::OMPD_interop;
+ }},
+ c.u);
+ },
+ [](const parser::OpenMPUtilityConstruct &c) {
+ return common::visit(
+ common::visitors{[](const parser::OmpErrorDirective &c) {
+ return llvm::omp::OMPD_error;
+ },
+ [](const parser::OmpNothingDirective &c) {
+ return llvm::omp::OMPD_nothing;
+ }},
+ c.u);
+ }},
+ ompConstruct.u);
+}
} // namespace omp
} // namespace lower
} // namespace Fortran
diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h
index 1526bd4e9023..8e3ad5c3452e 100644
--- a/flang/lib/Lower/OpenMP/Utils.h
+++ b/flang/lib/Lower/OpenMP/Utils.h
@@ -166,6 +166,9 @@ bool collectLoopRelatedInfo(
lower::pft::Evaluation &eval, const omp::List<omp::Clause> &clauses,
mlir::omp::LoopRelatedClauseOps &result,
llvm::SmallVectorImpl<const semantics::Symbol *> &iv);
+
+llvm::omp::Directive
+extractOmpDirective(const parser::OpenMPConstruct &ompConstruct);
} // namespace omp
} // namespace lower
} // namespace Fortran
diff --git a/flang/test/Lower/OpenACC/acc-host-data-unwrap-defaultbounds.f90 b/flang/test/Lower/OpenACC/acc-host-data-unwrap-defaultbounds.f90
index 164eb32a8f68..2de7cc5761a2 100644
--- a/flang/test/Lower/OpenACC/acc-host-data-unwrap-defaultbounds.f90
+++ b/flang/test/Lower/OpenACC/acc-host-data-unwrap-defaultbounds.f90
@@ -15,15 +15,17 @@ subroutine acc_host_data()
!$acc end host_data
! CHECK: %[[BOUND:.*]] = acc.bounds lowerbound(%{{.*}} : index) upperbound(%{{.*}} : index) stride(%{{.*}} : index) startIdx(%{{.*}} : index)
-! CHECK: %[[DA:.*]] = acc.use_device varPtr(%[[DECLA]]#0 : !fir.ref<!fir.array<10xf32>>) bounds(%[[BOUND]]) -> !fir.ref<!fir.array<10xf32>> {name = "a"}
-! CHECK: acc.host_data dataOperands(%[[DA]] : !fir.ref<!fir.array<10xf32>>)
+! CHECK: %[[DA0:.*]] = acc.use_device varPtr(%[[DECLA]]#0 : !fir.ref<!fir.array<10xf32>>) bounds(%[[BOUND]]) -> !fir.ref<!fir.array<10xf32>> {name = "a"}
+! CHECK: %[[DA1:.*]] = acc.use_device varPtr(%[[DECLA]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%[[BOUND]]) -> !fir.ref<!fir.array<10xf32>> {name = "a"}
+ ! CHECK: acc.host_data dataOperands(%[[DA0]], %[[DA1]] : !fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>)
!$acc host_data use_device(a) if_present
!$acc end host_data
! CHECK: %[[BOUND:.*]] = acc.bounds lowerbound(%{{.*}} : index) upperbound(%{{.*}} : index) stride(%{{.*}} : index) startIdx(%{{.*}} : index)
-! CHECK: %[[DA:.*]] = acc.use_device varPtr(%[[DECLA]]#0 : !fir.ref<!fir.array<10xf32>>) bounds(%[[BOUND]]) -> !fir.ref<!fir.array<10xf32>> {name = "a"}
-! CHECK: acc.host_data dataOperands(%[[DA]] : !fir.ref<!fir.array<10xf32>>) {
+! CHECK: %[[DA0:.*]] = acc.use_device varPtr(%[[DECLA]]#0 : !fir.ref<!fir.array<10xf32>>) bounds(%[[BOUND]]) -> !fir.ref<!fir.array<10xf32>> {name = "a"}
+! CHECK: %[[DA1:.*]] = acc.use_device varPtr(%[[DECLA]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%[[BOUND]]) -> !fir.ref<!fir.array<10xf32>> {name = "a"}
+! CHECK: acc.host_data dataOperands(%[[DA0]], %[[DA1]] : !fir.ref<!fir.array<10xf32>>{{.*}}) {
! CHECK: } attributes {ifPresent}
!$acc host_data use_device(a) if(ifCondition)
@@ -33,14 +35,14 @@ subroutine acc_host_data()
! CHECK: %[[DA:.*]] = acc.use_device varPtr(%[[DECLA]]#0 : !fir.ref<!fir.array<10xf32>>) bounds(%[[BOUND]]) -> !fir.ref<!fir.array<10xf32>> {name = "a"}
! CHECK: %[[LOAD_IFCOND:.*]] = fir.load %[[DECLIFCOND]]#0 : !fir.ref<!fir.logical<4>>
! CHECK: %[[IFCOND_I1:.*]] = fir.convert %[[LOAD_IFCOND]] : (!fir.logical<4>) -> i1
-! CHECK: acc.host_data if(%[[IFCOND_I1]]) dataOperands(%[[DA]] : !fir.ref<!fir.array<10xf32>>)
+! CHECK: acc.host_data if(%[[IFCOND_I1]]) dataOperands(%[[DA]]{{.*}} : !fir.ref<!fir.array<10xf32>>{{.*}})
!$acc host_data use_device(a) if(.true.)
!$acc end host_data
! CHECK: %[[BOUND:.*]] = acc.bounds lowerbound(%{{.*}} : index) upperbound(%{{.*}} : index) stride(%{{.*}} : index) startIdx(%{{.*}} : index)
! CHECK: %[[DA:.*]] = acc.use_device varPtr(%[[DECLA]]#0 : !fir.ref<!fir.array<10xf32>>) bounds(%[[BOUND]]) -> !fir.ref<!fir.array<10xf32>> {name = "a"}
-! CHECK: acc.host_data dataOperands(%[[DA]] : !fir.ref<!fir.array<10xf32>>)
+! CHECK: acc.host_data dataOperands(%[[DA]]{{.*}} : !fir.ref<!fir.array<10xf32>>{{.*}})
!$acc host_data use_device(a) if(.false.)
a = 1.0
diff --git a/flang/test/Lower/OpenACC/acc-host-data.f90 b/flang/test/Lower/OpenACC/acc-host-data.f90
index 871eabd256ca..4d09b25b983b 100644
--- a/flang/test/Lower/OpenACC/acc-host-data.f90
+++ b/flang/test/Lower/OpenACC/acc-host-data.f90
@@ -14,34 +14,37 @@ subroutine acc_host_data()
!$acc host_data use_device(a)
!$acc end host_data
-! CHECK: %[[DA:.*]] = acc.use_device varPtr(%[[DECLA]]#0 : !fir.ref<!fir.array<10xf32>>) -> !fir.ref<!fir.array<10xf32>> {name = "a"}
-! CHECK: acc.host_data dataOperands(%[[DA]] : !fir.ref<!fir.array<10xf32>>)
+! CHECK: %[[DA0:.*]] = acc.use_device varPtr(%[[DECLA]]#0 : !fir.ref<!fir.array<10xf32>>) -> !fir.ref<!fir.array<10xf32>> {name = "a"}
+! CHECK: %[[DA1:.*]] = acc.use_device varPtr(%[[DECLA]]#1 : !fir.ref<!fir.array<10xf32>>) -> !fir.ref<!fir.array<10xf32>> {name = "a"}
+! CHECK: acc.host_data dataOperands(%[[DA0]], %[[DA1]] : !fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>)
!$acc host_data use_device(a) if_present
!$acc end host_data
-! CHECK: %[[DA:.*]] = acc.use_device varPtr(%[[DECLA]]#0 : !fir.ref<!fir.array<10xf32>>) -> !fir.ref<!fir.array<10xf32>> {name = "a"}
-! CHECK: acc.host_data dataOperands(%[[DA]] : !fir.ref<!fir.array<10xf32>>) {
+! CHECK: %[[DA0:.*]] = acc.use_device varPtr(%[[DECLA]]#0 : !fir.ref<!fir.array<10xf32>>) -> !fir.ref<!fir.array<10xf32>> {name = "a"}
+! CHECK: %[[DA1:.*]] = acc.use_device varPtr(%[[DECLA]]#1 : !fir.ref<!fir.array<10xf32>>) -> !fir.ref<!fir.array<10xf32>> {name = "a"}
+! CHECK: acc.host_data dataOperands(%[[DA0]], %[[DA1]] : !fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>)
! CHECK: } attributes {ifPresent}
- !$acc host_data use_device(a) if_present if_present
+ !$acc host_data use_device(a) if_present
!$acc end host_data
-! CHECK: acc.host_data dataOperands(%{{.*}} : !fir.ref<!fir.array<10xf32>>) {
+! CHECK: acc.host_data dataOperands(%{{.*}}{{.*}} : !fir.ref<!fir.array<10xf32>>{{.*}}) {
! CHECK: } attributes {ifPresent}
!$acc host_data use_device(a) if(ifCondition)
!$acc end host_data
-! CHECK: %[[DA:.*]] = acc.use_device varPtr(%[[DECLA]]#0 : !fir.ref<!fir.array<10xf32>>) -> !fir.ref<!fir.array<10xf32>> {name = "a"}
+! CHECK: %[[DA0:.*]] = acc.use_device varPtr(%[[DECLA]]#0 : !fir.ref<!fir.array<10xf32>>) -> !fir.ref<!fir.array<10xf32>> {name = "a"}
+! CHECK: %[[DA1:.*]] = acc.use_device varPtr(%[[DECLA]]#1 : !fir.ref<!fir.array<10xf32>>) -> !fir.ref<!fir.array<10xf32>> {name = "a"}
! CHECK: %[[LOAD_IFCOND:.*]] = fir.load %[[DECLIFCOND]]#0 : !fir.ref<!fir.logical<4>>
! CHECK: %[[IFCOND_I1:.*]] = fir.convert %[[LOAD_IFCOND]] : (!fir.logical<4>) -> i1
-! CHECK: acc.host_data if(%[[IFCOND_I1]]) dataOperands(%[[DA]] : !fir.ref<!fir.array<10xf32>>)
+! CHECK: acc.host_data if(%[[IFCOND_I1]]) dataOperands(%[[DA0]]{{.*}} : !fir.ref<!fir.array<10xf32>>{{.*}})
!$acc host_data use_device(a) if(.true.)
!$acc end host_data
! CHECK: %[[DA:.*]] = acc.use_device varPtr(%[[DECLA]]#0 : !fir.ref<!fir.array<10xf32>>) -> !fir.ref<!fir.array<10xf32>> {name = "a"}
-! CHECK: acc.host_data dataOperands(%[[DA]] : !fir.ref<!fir.array<10xf32>>)
+! CHECK: acc.host_data dataOperands(%[[DA]]{{.*}} : !fir.ref<!fir.array<10xf32>>{{.*}})
!$acc host_data use_device(a) if(.false.)
a = 1.0
diff --git a/flang/test/Lower/OpenACC/acc-routine.f90 b/flang/test/Lower/OpenACC/acc-routine.f90
index 789f3a57e1f7..1a63b4120235 100644
--- a/flang/test/Lower/OpenACC/acc-routine.f90
+++ b/flang/test/Lower/OpenACC/acc-routine.f90
@@ -2,13 +2,14 @@
! RUN: bbc -fopenacc -emit-hlfir %s -o - | FileCheck %s
-! CHECK: acc.routine @[[r14:.*]] func(@_QPacc_routine19) bind("_QPacc_routine17" [#acc.device_type<host>], "_QPacc_routine17" [#acc.device_type<default>], "_QPacc_routine16" [#acc.device_type<multicore>])
-! CHECK: acc.routine @[[r13:.*]] func(@_QPacc_routine18) bind("_QPacc_routine17" [#acc.device_type<host>], "_QPacc_routine16" [#acc.device_type<multicore>])
+! CHECK: acc.routine @[[r14:.*]] func(@_QPacc_routine19) bind(@_QPacc_routine17 [#acc.device_type<host>], @_QPacc_routine17
+! [#acc.device_type<default>], @_QPacc_routine16 [#acc.device_type<multicore>])
+! CHECK: acc.routine @[[r13:.*]] func(@_QPacc_routine18) bind(@_QPacc_routine17 [#acc.device_type<host>], @_QPacc_routine16 [#acc.device_type<multicore>])
! CHECK: acc.routine @[[r12:.*]] func(@_QPacc_routine17) worker ([#acc.device_type<host>]) vector ([#acc.device_type<multicore>])
! CHECK: acc.routine @[[r11:.*]] func(@_QPacc_routine16) gang([#acc.device_type<nvidia>]) seq ([#acc.device_type<host>])
! CHECK: acc.routine @[[r10:.*]] func(@_QPacc_routine11) seq
! CHECK: acc.routine @[[r09:.*]] func(@_QPacc_routine10) seq
-! CHECK: acc.routine @[[r08:.*]] func(@_QPacc_routine9) bind("_QPacc_routine9a")
+! CHECK: acc.routine @[[r08:.*]] func(@_QPacc_routine9) bind(@_QPacc_routine9a)
! CHECK: acc.routine @[[r07:.*]] func(@_QPacc_routine8) bind("routine8_")
! CHECK: acc.routine @[[r06:.*]] func(@_QPacc_routine7) gang(dim: 1 : i64)
! CHECK: acc.routine @[[r05:.*]] func(@_QPacc_routine6) nohost
diff --git a/flang/test/Lower/OpenACC/acc-routine03.f90 b/flang/test/Lower/OpenACC/acc-routine03.f90
index 85e4ef580f98..ddd6bda0367e 100644
--- a/flang/test/Lower/OpenACC/acc-routine03.f90
+++ b/flang/test/Lower/OpenACC/acc-routine03.f90
@@ -30,6 +30,6 @@ end interface
end subroutine
! CHECK: acc.routine @acc_routine_1 func(@_QPsub2) worker nohost
-! CHECK: acc.routine @acc_routine_0 func(@_QPsub1) bind("_QPsub2") worker
+! CHECK: acc.routine @acc_routine_0 func(@_QPsub1) bind(@_QPsub2) worker
! CHECK: func.func @_QPsub1(%arg0: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "a"}) attributes {acc.routine_info = #acc.routine_info<[@acc_routine_0]>}
! CHECK: func.func @_QPsub2(%arg0: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "a"}) attributes {acc.routine_info = #acc.routine_info<[@acc_routine_1]>}
diff --git a/flang/test/Lower/OpenACC/acc-use-device.f90 b/flang/test/Lower/OpenACC/acc-use-device.f90
new file mode 100644
index 000000000000..081a6e317bfc
--- /dev/null
+++ b/flang/test/Lower/OpenACC/acc-use-device.f90
@@ -0,0 +1,61 @@
+! This test checks whether the OpenACC use_device clause is applied on both results of hlfir.declare.
+
+! RUN: bbc -fopenacc -emit-hlfir %s -o - | FileCheck %s
+
+! Test for automatic variable appearing in use_device clause.
+subroutine test()
+ integer :: N = 100
+ real*8 :: b(-1:N)
+! CHECK: %[[A0:.*]] = fir.alloca !fir.array<?xf64>, %{{.*}} {bindc_name = "b", uniq_name = "_QFtestEb"}
+! CHECK: %[[A1:.*]] = fir.shape_shift {{.*}} : (index, index) -> !fir.shapeshift<1>
+! CHECK: %[[A:.*]]:2 = hlfir.declare %[[A0]](%[[A1]]) {uniq_name = "_QFtestEb"} : (!fir.ref<!fir.array<?xf64>>, !fir.shapeshift<1>) -> (!fir.box<!fir.array<?xf64>>, !fir.ref<!fir.array<?xf64>>)
+
+ !$acc data copy(b)
+! CHECK: %[[B:.*]] = acc.copyin var(%[[A]]#0 : !fir.box<!fir.array<?xf64>>) -> !fir.box<!fir.array<?xf64>> {dataClause = #acc<data_clause acc_copy>, name = "b"}
+! CHECK: acc.data dataOperands(%[[B]] : !fir.box<!fir.array<?xf64>>) {
+
+ !$acc host_data use_device(b)
+ call vadd(b)
+ !$acc end host_data
+! CHECK: %[[C:.*]] = acc.use_device var(%[[A]]#0 : !fir.box<!fir.array<?xf64>>) -> !fir.box<!fir.array<?xf64>> {name = "b"}
+! CHECK: %[[D:.*]] = acc.use_device varPtr(%[[A]]#1 : !fir.ref<!fir.array<?xf64>>) -> !fir.ref<!fir.array<?xf64>> {name = "b"}
+! CHECK: acc.host_data dataOperands(%[[C]], %[[D]] : !fir.box<!fir.array<?xf64>>, !fir.ref<!fir.array<?xf64>>) {
+! CHECK: fir.call @_QPvadd(%[[A]]#1) fastmath<contract> : (!fir.ref<!fir.array<?xf64>>) -> ()
+ !$acc end data
+! CHECK: acc.copyout accVar(%[[B]] : !fir.box<!fir.array<?xf64>>) to var(%[[A]]#0 : !fir.box<!fir.array<?xf64>>) {dataClause = #acc<data_clause acc_copy>, name = "b"}
+end
+
+! Test for allocatable, pointer and assumed-shape variables appearing in use_device clause.
+subroutine test2(a, b, c)
+ integer :: N = 100
+ real*8, allocatable :: a(:)
+ real*8, target, allocatable :: d(:)
+ real*8 :: b(:)
+ real*8, pointer :: c(:)
+ call allocate(a(N))
+ call allocate(d(N))
+ c => d
+! CHECK: %[[DS:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[E:.*]]:2 = hlfir.declare %arg0 dummy_scope %[[DS]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest2Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, !fir.dscope) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>)
+! CHECK: %[[F:.*]]:2 = hlfir.declare %arg1 dummy_scope %[[DS]] {uniq_name = "_QFtest2Eb"} : (!fir.box<!fir.array<?xf64>>, !fir.dscope) -> (!fir.box<!fir.array<?xf64>>, !fir.box<!fir.array<?xf64>>)
+! CHECK: %[[G:.*]]:2 = hlfir.declare %arg2 dummy_scope %[[DS]] {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFtest2Ec"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf64>>>>, !fir.dscope) -> (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf64>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf64>>>>)
+
+ !$acc data copy(a,b,c,d)
+ !$acc host_data use_device(a,b,c)
+ call vadd2(a,b,c)
+ !$acc end host_data
+
+! CHECK: %[[H:.*]] = acc.use_device varPtr(%[[E]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>> {name = "a"}
+! CHECK: %[[I:.*]] = acc.use_device varPtr(%[[E]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>> {name = "a"}
+! CHECK: %[[J:.*]] = acc.use_device var(%[[F]]#0 : !fir.box<!fir.array<?xf64>>) -> !fir.box<!fir.array<?xf64>> {name = "b"}
+! CHECK: %[[K:.*]] = acc.use_device var(%[[F]]#1 : !fir.box<!fir.array<?xf64>>) -> !fir.box<!fir.array<?xf64>> {name = "b"}
+! CHECK: %[[L:.*]] = acc.use_device varPtr(%[[G]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf64>>>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf64>>>> {name = "c"}
+! CHECK: %[[M:.*]] = acc.use_device varPtr(%[[G]]#1 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf64>>>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf64>>>> {name = "c"}
+! CHECK: acc.host_data dataOperands(%[[H]], %[[I]], %[[J]], %[[K]], %[[L]], %[[M]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, !fir.box<!fir.array<?xf64>>, !fir.box<!fir.array<?xf64>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf64>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf64>>>>) {
+
+
+
+
+ !$acc end data
+
+end
diff --git a/libc/test/src/math/cospif_test.cpp b/libc/test/src/math/cospif_test.cpp
index cb88bfcade0d..5c30fb7c8718 100644
--- a/libc/test/src/math/cospif_test.cpp
+++ b/libc/test/src/math/cospif_test.cpp
@@ -100,7 +100,7 @@ TEST_F(LlvmLibcCospifTest, SmallValues) {
LIBC_NAMESPACE::cospif(x), 0.5);
}
-// SDCOMP-26094: check sinfpi in the cases for which the range reducer
+// SDCOMP-26094: check cospif in the cases for which the range reducer
// returns values furthest beyond its nominal upper bound of pi/4.
TEST_F(LlvmLibcCospifTest, SDCOMP_26094) {
for (uint32_t v : SDCOMP26094_VALUES) {
diff --git a/libc/test/src/math/sincosf_test.cpp b/libc/test/src/math/sincosf_test.cpp
index ad2155f329cd..4aac1fabfbd6 100644
--- a/libc/test/src/math/sincosf_test.cpp
+++ b/libc/test/src/math/sincosf_test.cpp
@@ -164,7 +164,7 @@ TEST_F(LlvmLibcSinCosfTest, SpecialValues) {
}
}
-// SDCOMP-26094: check sinf in the cases for which the range reducer
+// SDCOMP-26094: check sincosf in the cases for which the range reducer
// returns values furthest beyond its nominal upper bound of pi/4.
TEST_F(LlvmLibcSinCosfTest, SDCOMP_26094) {
for (uint32_t v : SDCOMP26094_VALUES) {
diff --git a/libc/test/src/math/sinpif_test.cpp b/libc/test/src/math/sinpif_test.cpp
index 986c676761f0..94e3dbc4f07d 100644
--- a/libc/test/src/math/sinpif_test.cpp
+++ b/libc/test/src/math/sinpif_test.cpp
@@ -100,7 +100,7 @@ TEST_F(LlvmLibcSinpifTest, SmallValues) {
LIBC_NAMESPACE::sinpif(x), 0.5);
}
-// SDCOMP-26094: check sinfpi in the cases for which the range reducer
+// SDCOMP-26094: check sinpif in the cases for which the range reducer
// returns values furthest beyond its nominal upper bound of pi/4.
TEST_F(LlvmLibcSinpifTest, SDCOMP_26094) {
for (uint32_t v : SDCOMP26094_VALUES) {
diff --git a/lldb/source/Plugins/Process/minidump/MinidumpParser.cpp b/lldb/source/Plugins/Process/minidump/MinidumpParser.cpp
index ef691b77193c..58ebb7be1199 100644
--- a/lldb/source/Plugins/Process/minidump/MinidumpParser.cpp
+++ b/lldb/source/Plugins/Process/minidump/MinidumpParser.cpp
@@ -108,13 +108,21 @@ MinidumpParser::GetThreadContext(const minidump::Thread &td) {
llvm::ArrayRef<uint8_t>
MinidumpParser::GetThreadContextWow64(const minidump::Thread &td) {
+ Log *log = GetLog(LLDBLog::Process);
// On Windows, a 32-bit process can run on a 64-bit machine under WOW64. If
// the minidump was captured with a 64-bit debugger, then the CONTEXT we just
// grabbed from the mini_dump_thread is the one for the 64-bit "native"
// process rather than the 32-bit "guest" process we care about. In this
// case, we can get the 32-bit CONTEXT from the TEB (Thread Environment
// Block) of the 64-bit process.
- auto teb_mem = GetMemory(td.EnvironmentBlock, sizeof(TEB64));
+ auto teb_mem_maybe = GetMemory(td.EnvironmentBlock, sizeof(TEB64));
+ if (!teb_mem_maybe) {
+ LLDB_LOG_ERROR(log, teb_mem_maybe.takeError(),
+ "Failed to read Thread Environment Block: {0}");
+ return {};
+ }
+
+ auto teb_mem = *teb_mem_maybe;
if (teb_mem.empty())
return {};
@@ -126,8 +134,16 @@ MinidumpParser::GetThreadContextWow64(const minidump::Thread &td) {
// Slot 1 of the thread-local storage in the 64-bit TEB points to a structure
// that includes the 32-bit CONTEXT (after a ULONG). See:
// https://msdn.microsoft.com/en-us/library/ms681670.aspx
- auto context =
+ auto context_maybe =
GetMemory(wow64teb->tls_slots[1] + 4, sizeof(MinidumpContext_x86_32));
+ if (!context_maybe) {
+ LLDB_LOG_ERROR(log, context_maybe.takeError(),
+ "Failed to read WOW Thread Context: {0}");
+ return {};
+ }
+
+ auto context = *context_maybe;
+
if (context.size() < sizeof(MinidumpContext_x86_32))
return {};
@@ -478,11 +494,13 @@ void MinidumpParser::PopulateMemoryRanges() {
m_memory_ranges.Sort();
}
-llvm::ArrayRef<uint8_t> MinidumpParser::GetMemory(lldb::addr_t addr,
- size_t size) {
+llvm::Expected<llvm::ArrayRef<uint8_t>>
+MinidumpParser::GetMemory(lldb::addr_t addr, size_t size) {
std::optional<minidump::Range> range = FindMemoryRange(addr);
if (!range)
- return {};
+ return llvm::createStringError(
+ llvm::inconvertibleErrorCode(),
+ "No memory range found for address (0x%" PRIx64 ")", addr);
// There's at least some overlap between the beginning of the desired range
// (addr) and the current range. Figure out where the overlap begins and
@@ -491,7 +509,11 @@ llvm::ArrayRef<uint8_t> MinidumpParser::GetMemory(lldb::addr_t addr,
const size_t offset = addr - range->start;
if (addr < range->start || offset >= range->range_ref.size())
- return {};
+ return llvm::createStringError(
+ llvm::inconvertibleErrorCode(),
+ "Address (0x%" PRIx64 ") is not in range [0x%" PRIx64 " - 0x%" PRIx64
+ ")",
+ addr, range->start, range->start + range->range_ref.size());
const size_t overlap = std::min(size, range->range_ref.size() - offset);
return range->range_ref.slice(offset, overlap);
diff --git a/lldb/source/Plugins/Process/minidump/MinidumpParser.h b/lldb/source/Plugins/Process/minidump/MinidumpParser.h
index 14599f8d572a..3b7d33daca71 100644
--- a/lldb/source/Plugins/Process/minidump/MinidumpParser.h
+++ b/lldb/source/Plugins/Process/minidump/MinidumpParser.h
@@ -104,7 +104,8 @@ public:
std::optional<Range> FindMemoryRange(lldb::addr_t addr);
- llvm::ArrayRef<uint8_t> GetMemory(lldb::addr_t addr, size_t size);
+ llvm::Expected<llvm::ArrayRef<uint8_t>> GetMemory(lldb::addr_t addr,
+ size_t size);
/// Returns a list of memory regions and a flag indicating whether the list is
/// complete (includes all regions mapped into the process memory).
diff --git a/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp b/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp
index ef3c00e2857d..17a421a72274 100644
--- a/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp
+++ b/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp
@@ -322,12 +322,15 @@ size_t ProcessMinidump::ReadMemory(lldb::addr_t addr, void *buf, size_t size,
size_t ProcessMinidump::DoReadMemory(lldb::addr_t addr, void *buf, size_t size,
Status &error) {
- llvm::ArrayRef<uint8_t> mem = m_minidump_parser->GetMemory(addr, size);
- if (mem.empty()) {
- error = Status::FromErrorString("could not parse memory info");
+ llvm::Expected<llvm::ArrayRef<uint8_t>> mem_maybe =
+ m_minidump_parser->GetMemory(addr, size);
+ if (!mem_maybe) {
+ error = Status::FromError(mem_maybe.takeError());
return 0;
}
+ llvm::ArrayRef<uint8_t> mem = *mem_maybe;
+
std::memcpy(buf, mem.data(), mem.size());
return mem.size();
}
diff --git a/lldb/test/Shell/Minidump/missing-memory-region.yaml b/lldb/test/Shell/Minidump/missing-memory-region.yaml
new file mode 100644
index 000000000000..1784cacfaf1b
--- /dev/null
+++ b/lldb/test/Shell/Minidump/missing-memory-region.yaml
@@ -0,0 +1,42 @@
+# Check that looking up a memory region not present in the Minidump fails
+# even if it's in the /proc/<pid>/maps file.
+
+# RUN: yaml2obj %s -o %t
+# RUN: %lldb -c %t -o "memory read 0x5000" 2>&1 | FileCheck %s
+
+# CHECK-LABEL: (lldb) memory read 0x5000
+# CHECK-NEXT: error: No memory range found for address (0x5000)
+
+--- !minidump
+Streams:
+ - Type: SystemInfo
+ Processor Arch: AMD64
+ Processor Level: 6
+ Processor Revision: 15876
+ Number of Processors: 40
+ Platform ID: Linux
+ CSD Version: 'Linux 3.13.0-91-generic #138-Ubuntu SMP Fri Jun 24 17:00:34 UTC 2016 x86_64'
+ CPU:
+ Vendor ID: GenuineIntel
+ Version Info: 0x00000000
+ Feature Info: 0x00000000
+ - Type: LinuxProcStatus
+ Text: |
+ Name: test-yaml
+ Umask: 0002
+ State: t (tracing stop)
+ Pid: 8567
+ - Type: LinuxMaps
+ Text: |
+ 0x1000-0x1100 r-xp 00000000 00:00 0
+ 0x2000-0x2200 rw-p 00000000 00:00 0
+ 0x4000-0x6000 rw-- 00000000 00:00 0
+ - Type: Memory64List
+ Memory Ranges:
+ - Start of Memory Range: 0x1000
+ Data Size: 0x100
+ Content : ''
+ - Start of Memory Range: 0x2000
+ Data Size: 0x200
+ Content : ''
+...
diff --git a/lldb/test/Shell/Settings/TestChildCountTruncation.test b/lldb/test/Shell/Settings/TestChildCountTruncation.test
index a96a0d8310ee..da6436cb5ca2 100644
--- a/lldb/test/Shell/Settings/TestChildCountTruncation.test
+++ b/lldb/test/Shell/Settings/TestChildCountTruncation.test
@@ -2,7 +2,7 @@
# when target.max-children-count wasn't explicitly set.
# RUN: split-file %s %t
-# RUN: %clang_host -g -gdwarf %t/main.cpp -o %t.out
+# RUN: %clang_host -g %t/main.cpp -o %t.out
# RUN: %lldb -x -b -s %t/dwim-commands.input %t.out -o exit 2>&1 \
# RUN: | FileCheck %s --check-prefix=DWIM
#
diff --git a/lldb/unittests/Process/minidump/MinidumpParserTest.cpp b/lldb/unittests/Process/minidump/MinidumpParserTest.cpp
index ee31c8e63644..44f653c6fa13 100644
--- a/lldb/unittests/Process/minidump/MinidumpParserTest.cpp
+++ b/lldb/unittests/Process/minidump/MinidumpParserTest.cpp
@@ -308,16 +308,19 @@ Streams:
)"),
llvm::Succeeded());
- EXPECT_EQ((llvm::ArrayRef<uint8_t>{0x54}), parser->GetMemory(0x401d46, 1));
- EXPECT_EQ((llvm::ArrayRef<uint8_t>{0x54, 0x21}),
- parser->GetMemory(0x401d46, 4));
-
- EXPECT_EQ((llvm::ArrayRef<uint8_t>{0xc8, 0x4d, 0x04, 0xbc, 0xe9}),
- parser->GetMemory(0x7ffceb34a000, 5));
- EXPECT_EQ((llvm::ArrayRef<uint8_t>{0xc8, 0x4d, 0x04}),
- parser->GetMemory(0x7ffceb34a000, 3));
-
- EXPECT_EQ(llvm::ArrayRef<uint8_t>(), parser->GetMemory(0x500000, 512));
+ EXPECT_THAT_EXPECTED(parser->GetMemory(0x401d46, 1),
+ llvm::HasValue(llvm::ArrayRef<uint8_t>{0x54}));
+ EXPECT_THAT_EXPECTED(parser->GetMemory(0x401d46, 4),
+ llvm::HasValue(llvm::ArrayRef<uint8_t>{0x54, 0x21}));
+ EXPECT_THAT_EXPECTED(
+ parser->GetMemory(0x7ffceb34a000, 5),
+ llvm::HasValue(llvm::ArrayRef<uint8_t>{0xc8, 0x4d, 0x04, 0xbc, 0xe9}));
+ EXPECT_THAT_EXPECTED(
+ parser->GetMemory(0x7ffceb34a000, 3),
+ llvm::HasValue(llvm::ArrayRef<uint8_t>{0xc8, 0x4d, 0x04}));
+ EXPECT_THAT_EXPECTED(
+ parser->GetMemory(0x500000, 512),
+ llvm::FailedWithMessage("No memory range found for address (0x500000)"));
}
TEST_F(MinidumpParserTest, FindMemoryRangeWithFullMemoryMinidump) {
diff --git a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
index a101151eed7c..39fef921a959 100644
--- a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
+++ b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
@@ -530,6 +530,7 @@ private:
bool isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV, const Loop *L);
+ Value *tryToReuseLCSSAPhi(const SCEVAddRecExpr *S);
Value *expandAddRecExprLiterally(const SCEVAddRecExpr *);
PHINode *getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
const Loop *L, Type *&TruncTy,
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 0e8e4c9618bb..40464e91f9ef 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -609,6 +609,8 @@ namespace {
SDValue foldABSToABD(SDNode *N, const SDLoc &DL);
SDValue foldSelectToABD(SDValue LHS, SDValue RHS, SDValue True,
SDValue False, ISD::CondCode CC, const SDLoc &DL);
+ SDValue foldSelectToUMin(SDValue LHS, SDValue RHS, SDValue True,
+ SDValue False, ISD::CondCode CC, const SDLoc &DL);
SDValue unfoldMaskedMerge(SDNode *N);
SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
@@ -859,7 +861,7 @@ namespace {
auto LK = TLI.getTypeConversion(*DAG.getContext(), VT);
return (LK.first == TargetLoweringBase::TypeLegal ||
LK.first == TargetLoweringBase::TypePromoteInteger) &&
- TLI.isOperationLegal(ISD::UMIN, LK.second);
+ TLI.isOperationLegalOrCustom(ISD::UMIN, LK.second);
}
public:
@@ -4093,6 +4095,26 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
return N0;
}
+ // (sub x, ([v]select (ult x, y), 0, y)) -> (umin x, (sub x, y))
+ // (sub x, ([v]select (uge x, y), y, 0)) -> (umin x, (sub x, y))
+ if (N1.hasOneUse() && hasUMin(VT)) {
+ SDValue Y;
+ if (sd_match(N1, m_Select(m_SetCC(m_Specific(N0), m_Value(Y),
+ m_SpecificCondCode(ISD::SETULT)),
+ m_Zero(), m_Deferred(Y))) ||
+ sd_match(N1, m_Select(m_SetCC(m_Specific(N0), m_Value(Y),
+ m_SpecificCondCode(ISD::SETUGE)),
+ m_Deferred(Y), m_Zero())) ||
+ sd_match(N1, m_VSelect(m_SetCC(m_Specific(N0), m_Value(Y),
+ m_SpecificCondCode(ISD::SETULT)),
+ m_Zero(), m_Deferred(Y))) ||
+ sd_match(N1, m_VSelect(m_SetCC(m_Specific(N0), m_Value(Y),
+ m_SpecificCondCode(ISD::SETUGE)),
+ m_Deferred(Y), m_Zero())))
+ return DAG.getNode(ISD::UMIN, DL, VT, N0,
+ DAG.getNode(ISD::SUB, DL, VT, N0, Y));
+ }
+
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
@@ -4442,20 +4464,6 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
sd_match(N1, m_UMaxLike(m_Specific(A), m_Specific(B))))
return DAG.getNegative(DAG.getNode(ISD::ABDU, DL, VT, A, B), DL, VT);
- // (sub x, (select (ult x, y), 0, y)) -> (umin x, (sub x, y))
- // (sub x, (select (uge x, y), y, 0)) -> (umin x, (sub x, y))
- if (hasUMin(VT)) {
- SDValue Y;
- if (sd_match(N1, m_OneUse(m_Select(m_SetCC(m_Specific(N0), m_Value(Y),
- m_SpecificCondCode(ISD::SETULT)),
- m_Zero(), m_Deferred(Y)))) ||
- sd_match(N1, m_OneUse(m_Select(m_SetCC(m_Specific(N0), m_Value(Y),
- m_SpecificCondCode(ISD::SETUGE)),
- m_Deferred(Y), m_Zero()))))
- return DAG.getNode(ISD::UMIN, DL, VT, N0,
- DAG.getNode(ISD::SUB, DL, VT, N0, Y));
- }
-
return SDValue();
}
@@ -12173,6 +12181,30 @@ SDValue DAGCombiner::foldSelectToABD(SDValue LHS, SDValue RHS, SDValue True,
return SDValue();
}
+// ([v]select (ugt x, C), (add x, ~C), x) -> (umin (add x, ~C), x)
+// ([v]select (ult x, C), x, (add x, -C)) -> (umin x, (add x, -C))
+SDValue DAGCombiner::foldSelectToUMin(SDValue LHS, SDValue RHS, SDValue True,
+ SDValue False, ISD::CondCode CC,
+ const SDLoc &DL) {
+ APInt C;
+ EVT VT = True.getValueType();
+ if (sd_match(RHS, m_ConstInt(C)) && hasUMin(VT)) {
+ if (CC == ISD::SETUGT && LHS == False &&
+ sd_match(True, m_Add(m_Specific(False), m_SpecificInt(~C)))) {
+ SDValue AddC = DAG.getConstant(~C, DL, VT);
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, False, AddC);
+ return DAG.getNode(ISD::UMIN, DL, VT, Add, False);
+ }
+ if (CC == ISD::SETULT && LHS == True &&
+ sd_match(False, m_Add(m_Specific(True), m_SpecificInt(-C)))) {
+ SDValue AddC = DAG.getConstant(-C, DL, VT);
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, True, AddC);
+ return DAG.getNode(ISD::UMIN, DL, VT, True, Add);
+ }
+ }
+ return SDValue();
+}
+
SDValue DAGCombiner::visitSELECT(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -12358,24 +12390,8 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
// (select (ugt x, C), (add x, ~C), x) -> (umin (add x, ~C), x)
// (select (ult x, C), x, (add x, -C)) -> (umin x, (add x, -C))
- APInt C;
- if (sd_match(Cond1, m_ConstInt(C)) && hasUMin(VT)) {
- if (CC == ISD::SETUGT && Cond0 == N2 &&
- sd_match(N1, m_Add(m_Specific(N2), m_SpecificInt(~C)))) {
- // The resulting code relies on an unsigned wrap in ADD.
- // Recreating ADD to drop possible nuw/nsw flags.
- SDValue AddC = DAG.getConstant(~C, DL, VT);
- SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N2, AddC);
- return DAG.getNode(ISD::UMIN, DL, VT, Add, N2);
- }
- if (CC == ISD::SETULT && Cond0 == N1 &&
- sd_match(N2, m_Add(m_Specific(N1), m_SpecificInt(-C)))) {
- // Ditto.
- SDValue AddC = DAG.getConstant(-C, DL, VT);
- SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, AddC);
- return DAG.getNode(ISD::UMIN, DL, VT, N1, Add);
- }
- }
+ if (SDValue UMin = foldSelectToUMin(Cond0, Cond1, N1, N2, CC, DL))
+ return UMin;
}
if (!VT.isVector())
@@ -13412,6 +13428,11 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
}
}
}
+
+ // (vselect (ugt x, C), (add x, ~C), x) -> (umin (add x, ~C), x)
+ // (vselect (ult x, C), x, (add x, -C)) -> (umin x, (add x, -C))
+ if (SDValue UMin = foldSelectToUMin(LHS, RHS, N1, N2, CC, DL))
+ return UMin;
}
if (SimplifySelectOps(N, N1, N2))
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 996b0edd2420..bc57537ad5df 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -20,6 +20,7 @@
#include "Utils/AArch64BaseInfo.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/CFIInstBuilder.h"
#include "llvm/CodeGen/LivePhysRegs.h"
@@ -35,6 +36,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
@@ -7351,6 +7353,9 @@ bool AArch64InstrInfo::isThroughputPattern(unsigned Pattern) const {
case AArch64MachineCombinerPattern::MULSUBv2i32_indexed_OP2:
case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP1:
case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP2:
+ case AArch64MachineCombinerPattern::GATHER_LANE_i32:
+ case AArch64MachineCombinerPattern::GATHER_LANE_i16:
+ case AArch64MachineCombinerPattern::GATHER_LANE_i8:
return true;
} // end switch (Pattern)
return false;
@@ -7391,11 +7396,252 @@ static bool getMiscPatterns(MachineInstr &Root,
return false;
}
+static bool getGatherPattern(MachineInstr &Root,
+ SmallVectorImpl<unsigned> &Patterns,
+ unsigned LoadLaneOpCode, unsigned NumLanes) {
+ const MachineFunction *MF = Root.getMF();
+
+ // Early exit if optimizing for size.
+ if (MF->getFunction().hasMinSize())
+ return false;
+
+ const MachineRegisterInfo &MRI = MF->getRegInfo();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+
+ // The root of the pattern must load into the last lane of the vector.
+ if (Root.getOperand(2).getImm() != NumLanes - 1)
+ return false;
+
+ // Check that we have load into all lanes except lane 0.
+ // For each load we also want to check that:
+ // 1. It has a single non-debug use (since we will be replacing the virtual
+ // register)
+ // 2. That the addressing mode only uses a single offset register.
+ auto *CurrInstr = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
+ auto Range = llvm::seq<unsigned>(1, NumLanes - 1);
+ SmallSet<unsigned, 4> RemainingLanes(Range.begin(), Range.end());
+ while (!RemainingLanes.empty() && CurrInstr &&
+ CurrInstr->getOpcode() == LoadLaneOpCode &&
+ MRI.hasOneNonDBGUse(CurrInstr->getOperand(0).getReg()) &&
+ CurrInstr->getNumOperands() == 4) {
+ RemainingLanes.erase(CurrInstr->getOperand(2).getImm());
+ CurrInstr = MRI.getUniqueVRegDef(CurrInstr->getOperand(1).getReg());
+ }
+
+ if (!RemainingLanes.empty())
+ return false;
+
+ // Match the SUBREG_TO_REG sequence.
+ if (CurrInstr->getOpcode() != TargetOpcode::SUBREG_TO_REG)
+ return false;
+
+ // Verify that the subreg to reg loads an integer into the first lane.
+ auto Lane0LoadReg = CurrInstr->getOperand(2).getReg();
+ unsigned SingleLaneSizeInBits = 128 / NumLanes;
+ if (TRI->getRegSizeInBits(Lane0LoadReg, MRI) != SingleLaneSizeInBits)
+ return false;
+
+ // Verify that it also has a single non debug use.
+ if (!MRI.hasOneNonDBGUse(Lane0LoadReg))
+ return false;
+
+ switch (NumLanes) {
+ case 4:
+ Patterns.push_back(AArch64MachineCombinerPattern::GATHER_LANE_i32);
+ break;
+ case 8:
+ Patterns.push_back(AArch64MachineCombinerPattern::GATHER_LANE_i16);
+ break;
+ case 16:
+ Patterns.push_back(AArch64MachineCombinerPattern::GATHER_LANE_i8);
+ break;
+ default:
+ llvm_unreachable("Got bad number of lanes for gather pattern.");
+ }
+
+ return true;
+}
+
+/// Search for patterns where we use LD1 instructions to load into
+/// separate lanes of an 128 bit Neon register. We can increase Memory Level
+/// Parallelism by loading into 2 Neon registers instead.
+static bool getLoadPatterns(MachineInstr &Root,
+ SmallVectorImpl<unsigned> &Patterns) {
+
+ // The pattern searches for loads into single lanes.
+ switch (Root.getOpcode()) {
+ case AArch64::LD1i32:
+ return getGatherPattern(Root, Patterns, Root.getOpcode(), 4);
+ case AArch64::LD1i16:
+ return getGatherPattern(Root, Patterns, Root.getOpcode(), 8);
+ case AArch64::LD1i8:
+ return getGatherPattern(Root, Patterns, Root.getOpcode(), 16);
+ default:
+ return false;
+ }
+}
+
+static void
+generateGatherPattern(MachineInstr &Root,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ DenseMap<Register, unsigned> &InstrIdxForVirtReg,
+ unsigned Pattern, unsigned NumLanes) {
+
+ MachineFunction &MF = *Root.getParent()->getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+
+ // Gather the initial load instructions to build the pattern
+ SmallVector<MachineInstr *, 16> LoadToLaneInstrs;
+ MachineInstr *CurrInstr = &Root;
+ for (unsigned i = 0; i < NumLanes - 1; ++i) {
+ LoadToLaneInstrs.push_back(CurrInstr);
+ CurrInstr = MRI.getUniqueVRegDef(CurrInstr->getOperand(1).getReg());
+ }
+
+ // Sort the load instructions according to the lane.
+ llvm::sort(LoadToLaneInstrs,
+ [](const MachineInstr *A, const MachineInstr *B) {
+ return A->getOperand(2).getImm() > B->getOperand(2).getImm();
+ });
+
+ MachineInstr *SubregToReg = CurrInstr;
+ LoadToLaneInstrs.push_back(
+ MRI.getUniqueVRegDef(SubregToReg->getOperand(2).getReg()));
+ auto LoadToLaneInstrsAscending = llvm::reverse(LoadToLaneInstrs);
+
+ const TargetRegisterClass *FPR128RegClass =
+ MRI.getRegClass(Root.getOperand(0).getReg());
+
+ auto LoadLaneToRegister = [&](MachineInstr *OriginalInstr,
+ Register SrcRegister, unsigned Lane,
+ Register OffsetRegister) {
+ auto NewRegister = MRI.createVirtualRegister(FPR128RegClass);
+ MachineInstrBuilder LoadIndexIntoRegister =
+ BuildMI(MF, MIMetadata(*OriginalInstr), TII->get(Root.getOpcode()),
+ NewRegister)
+ .addReg(SrcRegister)
+ .addImm(Lane)
+ .addReg(OffsetRegister, getKillRegState(true));
+ InstrIdxForVirtReg.insert(std::make_pair(NewRegister, InsInstrs.size()));
+ InsInstrs.push_back(LoadIndexIntoRegister);
+ return NewRegister;
+ };
+
+ // Helper to create load instruction based on opcode
+ auto CreateLoadInstruction = [&](unsigned NumLanes, Register DestReg,
+ Register OffsetReg) -> MachineInstrBuilder {
+ unsigned Opcode;
+ switch (NumLanes) {
+ case 4:
+ Opcode = AArch64::LDRSui;
+ break;
+ case 8:
+ Opcode = AArch64::LDRHui;
+ break;
+ case 16:
+ Opcode = AArch64::LDRBui;
+ break;
+ default:
+ llvm_unreachable(
+ "Got unsupported number of lanes in machine-combiner gather pattern");
+ }
+ // Immediate offset load
+ return BuildMI(MF, MIMetadata(Root), TII->get(Opcode), DestReg)
+ .addReg(OffsetReg)
+ .addImm(0); // immediate offset
+ };
+
+ // Load the remaining lanes into register 0.
+ auto LanesToLoadToReg0 =
+ llvm::make_range(LoadToLaneInstrsAscending.begin() + 1,
+ LoadToLaneInstrsAscending.begin() + NumLanes / 2);
+ auto PrevReg = SubregToReg->getOperand(0).getReg();
+ for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg0)) {
+ PrevReg = LoadLaneToRegister(LoadInstr, PrevReg, Index + 1,
+ LoadInstr->getOperand(3).getReg());
+ DelInstrs.push_back(LoadInstr);
+ }
+ auto LastLoadReg0 = PrevReg;
+
+ // First load into register 1. Perform a LDRSui to zero out the upper lanes in
+ // a single instruction.
+ auto Lane0Load = *LoadToLaneInstrsAscending.begin();
+ auto OriginalSplitLoad =
+ *std::next(LoadToLaneInstrsAscending.begin(), NumLanes / 2);
+ auto DestRegForMiddleIndex = MRI.createVirtualRegister(
+ MRI.getRegClass(Lane0Load->getOperand(0).getReg()));
+
+ MachineInstrBuilder MiddleIndexLoadInstr =
+ CreateLoadInstruction(NumLanes, DestRegForMiddleIndex,
+ OriginalSplitLoad->getOperand(3).getReg());
+
+ InstrIdxForVirtReg.insert(
+ std::make_pair(DestRegForMiddleIndex, InsInstrs.size()));
+ InsInstrs.push_back(MiddleIndexLoadInstr);
+ DelInstrs.push_back(OriginalSplitLoad);
+
+ // Subreg To Reg instruction for register 1.
+ auto DestRegForSubregToReg = MRI.createVirtualRegister(FPR128RegClass);
+ unsigned SubregType;
+ switch (NumLanes) {
+ case 4:
+ SubregType = AArch64::ssub;
+ break;
+ case 8:
+ SubregType = AArch64::hsub;
+ break;
+ case 16:
+ SubregType = AArch64::bsub;
+ break;
+ default:
+ llvm_unreachable(
+ "Got invalid NumLanes for machine-combiner gather pattern");
+ }
+
+ auto SubRegToRegInstr =
+ BuildMI(MF, MIMetadata(Root), TII->get(SubregToReg->getOpcode()),
+ DestRegForSubregToReg)
+ .addImm(0)
+ .addReg(DestRegForMiddleIndex, getKillRegState(true))
+ .addImm(SubregType);
+ InstrIdxForVirtReg.insert(
+ std::make_pair(DestRegForSubregToReg, InsInstrs.size()));
+ InsInstrs.push_back(SubRegToRegInstr);
+
+ // Load remaining lanes into register 1.
+ auto LanesToLoadToReg1 =
+ llvm::make_range(LoadToLaneInstrsAscending.begin() + NumLanes / 2 + 1,
+ LoadToLaneInstrsAscending.end());
+ PrevReg = SubRegToRegInstr->getOperand(0).getReg();
+ for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg1)) {
+ PrevReg = LoadLaneToRegister(LoadInstr, PrevReg, Index + 1,
+ LoadInstr->getOperand(3).getReg());
+ if (Index == NumLanes / 2 - 2) {
+ break;
+ }
+ DelInstrs.push_back(LoadInstr);
+ }
+ auto LastLoadReg1 = PrevReg;
+
+ // Create the final zip instruction to combine the results.
+ MachineInstrBuilder ZipInstr =
+ BuildMI(MF, MIMetadata(Root), TII->get(AArch64::ZIP1v2i64),
+ Root.getOperand(0).getReg())
+ .addReg(LastLoadReg0)
+ .addReg(LastLoadReg1);
+ InsInstrs.push_back(ZipInstr);
+}
+
CombinerObjective
AArch64InstrInfo::getCombinerObjective(unsigned Pattern) const {
switch (Pattern) {
case AArch64MachineCombinerPattern::SUBADD_OP1:
case AArch64MachineCombinerPattern::SUBADD_OP2:
+ case AArch64MachineCombinerPattern::GATHER_LANE_i32:
+ case AArch64MachineCombinerPattern::GATHER_LANE_i16:
+ case AArch64MachineCombinerPattern::GATHER_LANE_i8:
return CombinerObjective::MustReduceDepth;
default:
return TargetInstrInfo::getCombinerObjective(Pattern);
@@ -7425,6 +7671,10 @@ bool AArch64InstrInfo::getMachineCombinerPatterns(
if (getMiscPatterns(Root, Patterns))
return true;
+ // Load patterns
+ if (getLoadPatterns(Root, Patterns))
+ return true;
+
return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
DoRegPressureReduce);
}
@@ -8680,6 +8930,21 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
MUL = genFNegatedMAD(MF, MRI, TII, Root, InsInstrs);
break;
}
+ case AArch64MachineCombinerPattern::GATHER_LANE_i32: {
+ generateGatherPattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
+ Pattern, 4);
+ break;
+ }
+ case AArch64MachineCombinerPattern::GATHER_LANE_i16: {
+ generateGatherPattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
+ Pattern, 8);
+ break;
+ }
+ case AArch64MachineCombinerPattern::GATHER_LANE_i8: {
+ generateGatherPattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
+ Pattern, 16);
+ break;
+ }
} // end switch (Pattern)
// Record MUL and ADD/SUB for deletion
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index 7c255da333e4..02734866e712 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -172,6 +172,10 @@ enum AArch64MachineCombinerPattern : unsigned {
FMULv8i16_indexed_OP2,
FNMADD,
+
+ GATHER_LANE_i32,
+ GATHER_LANE_i16,
+ GATHER_LANE_i8
};
class AArch64InstrInfo final : public AArch64GenInstrInfo {
const AArch64RegisterInfo RI;
diff --git a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
index 8fb6ccaac2c9..0d4f24172b57 100644
--- a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
@@ -69,6 +69,39 @@ static const Intrinsic::ID ScalableVlsegIntrIds[] = {
Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask,
Intrinsic::riscv_vlseg8_mask};
+static const Intrinsic::ID FixedVssegIntrIds[] = {
+ Intrinsic::riscv_seg2_store_mask, Intrinsic::riscv_seg3_store_mask,
+ Intrinsic::riscv_seg4_store_mask, Intrinsic::riscv_seg5_store_mask,
+ Intrinsic::riscv_seg6_store_mask, Intrinsic::riscv_seg7_store_mask,
+ Intrinsic::riscv_seg8_store_mask};
+
+static const Intrinsic::ID ScalableVssegIntrIds[] = {
+ Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
+ Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
+ Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask,
+ Intrinsic::riscv_vsseg8_mask};
+
+static bool isMultipleOfN(const Value *V, const DataLayout &DL, unsigned N) {
+ assert(N);
+ if (N == 1)
+ return true;
+
+ using namespace PatternMatch;
+ // Right now we're only recognizing the simplest pattern.
+ uint64_t C;
+ if (match(V, m_CombineOr(m_ConstantInt(C),
+ m_NUWMul(m_Value(), m_ConstantInt(C)))) &&
+ C && C % N == 0)
+ return true;
+
+ if (isPowerOf2_32(N)) {
+ KnownBits KB = llvm::computeKnownBits(V, DL);
+ return KB.countMinTrailingZeros() >= Log2_32(N);
+ }
+
+ return false;
+}
+
/// Lower an interleaved load into a vlsegN intrinsic.
///
/// E.g. Lower an interleaved load (Factor = 2):
@@ -134,18 +167,6 @@ bool RISCVTargetLowering::lowerInterleavedLoad(
return true;
}
-static const Intrinsic::ID FixedVssegIntrIds[] = {
- Intrinsic::riscv_seg2_store_mask, Intrinsic::riscv_seg3_store_mask,
- Intrinsic::riscv_seg4_store_mask, Intrinsic::riscv_seg5_store_mask,
- Intrinsic::riscv_seg6_store_mask, Intrinsic::riscv_seg7_store_mask,
- Intrinsic::riscv_seg8_store_mask};
-
-static const Intrinsic::ID ScalableVssegIntrIds[] = {
- Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
- Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
- Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask,
- Intrinsic::riscv_vsseg8_mask};
-
/// Lower an interleaved store into a vssegN intrinsic.
///
/// E.g. Lower an interleaved store (Factor = 3):
@@ -235,27 +256,6 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
return true;
}
-static bool isMultipleOfN(const Value *V, const DataLayout &DL, unsigned N) {
- assert(N);
- if (N == 1)
- return true;
-
- using namespace PatternMatch;
- // Right now we're only recognizing the simplest pattern.
- uint64_t C;
- if (match(V, m_CombineOr(m_ConstantInt(C),
- m_NUWMul(m_Value(), m_ConstantInt(C)))) &&
- C && C % N == 0)
- return true;
-
- if (isPowerOf2_32(N)) {
- KnownBits KB = llvm::computeKnownBits(V, DL);
- return KB.countMinTrailingZeros() >= Log2_32(N);
- }
-
- return false;
-}
-
bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(
Instruction *Load, Value *Mask, IntrinsicInst *DI) const {
const unsigned Factor = getDeinterleaveIntrinsicFactor(DI->getIntrinsicID());
diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index 739ac00ba47c..ed08c0bfa2e7 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -1223,6 +1223,24 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
return Result;
}
+Value *SCEVExpander::tryToReuseLCSSAPhi(const SCEVAddRecExpr *S) {
+ const Loop *L = S->getLoop();
+ BasicBlock *EB = L->getExitBlock();
+ if (!EB || !EB->getSinglePredecessor() ||
+ !SE.DT.dominates(EB, Builder.GetInsertBlock()))
+ return nullptr;
+
+ for (auto &PN : EB->phis()) {
+ if (!SE.isSCEVable(PN.getType()) || PN.getType() != S->getType())
+ continue;
+ auto *ExitV = SE.getSCEV(&PN);
+ if (S == ExitV)
+ return &PN;
+ }
+
+ return nullptr;
+}
+
Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
// In canonical mode we compute the addrec as an expression of a canonical IV
// using evaluateAtIteration and expand the resulting SCEV expression. This
@@ -1262,6 +1280,11 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
return V;
}
+ // If S is expanded outside the defining loop, check if there is a
+ // matching LCSSA phi node for it.
+ if (Value *V = tryToReuseLCSSAPhi(S))
+ return V;
+
// {X,+,F} --> X + {0,+,F}
if (!S->getStart()->isZero()) {
if (isa<PointerType>(S->getType())) {
diff --git a/llvm/test/CodeGen/AArch64/aarch64-combine-gather-lanes.mir b/llvm/test/CodeGen/AArch64/aarch64-combine-gather-lanes.mir
new file mode 100644
index 000000000000..09eb18b0e357
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/aarch64-combine-gather-lanes.mir
@@ -0,0 +1,364 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -run-pass=machine-combiner -mcpu=neoverse-n2 -mtriple=aarch64-none-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name: split_loads_to_fpr128
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1, $x2, $x3, $x4
+
+ ; CHECK-LABEL: name: split_loads_to_fpr128
+ ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64common = COPY $x2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64common = COPY $x3
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64common = COPY $x4
+ ; CHECK-NEXT: [[LD_i32:%[0-9]+]]:fpr32 = LDRSroX [[COPY]], killed [[COPY1]], 0, 1
+ ; CHECK-NEXT: [[FIRST_REG:%[0-9]+]]:fpr128 = SUBREG_TO_REG 0, killed [[LD_i32]], %subreg.ssub
+ ; CHECK-NEXT: [[LD0_1:%[0-9]+]]:fpr128 = LD1i32 [[FIRST_REG]], 1, killed [[COPY2]]
+ ; CHECK-NEXT: [[LD1_0:%[0-9]+]]:fpr32 = LDRSui [[COPY3]], 0
+ ; CHECK-NEXT: [[SECOND_REG:%[0-9]+]]:fpr128 = SUBREG_TO_REG 0, killed [[LD1_0]], %subreg.ssub
+ ; CHECK-NEXT: [[LD1_1:%[0-9]+]]:fpr128 = LD1i32 [[SECOND_REG]], 1, killed [[COPY4]]
+ ; CHECK-NEXT: [[ZIP:%[0-9]+]]:fpr128 = ZIP1v2i64 [[LD0_1]], [[LD1_1]]
+ ; CHECK-NEXT: $q0 = COPY [[ZIP]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %0:gpr64common = COPY $x0
+ %1:gpr64common = COPY $x1
+ %2:gpr64common = COPY $x2
+ %3:gpr64common = COPY $x3
+ %4:gpr64common = COPY $x4
+ %5:fpr32 = LDRSroX %0, killed %1, 0, 1
+ %6:fpr128 = SUBREG_TO_REG 0, killed %5, %subreg.ssub
+ %7:fpr128 = LD1i32 %6, 1, killed %2
+ %8:fpr128 = LD1i32 %7, 2, killed %3
+ %9:fpr128 = LD1i32 %8, 3, killed %4
+ $q0 = COPY %9
+ RET_ReallyLR implicit $q0
+
+---
+name: split_loads_to_fpr128_ui
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1, $x2, $x3, $x4
+
+ ; CHECK-LABEL: name: split_loads_to_fpr128_ui
+ ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64common = COPY $x2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64common = COPY $x3
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64common = COPY $x4
+ ; CHECK-NEXT: [[LD_i32:%[0-9]+]]:fpr32 = LDRSui [[COPY]], 0
+ ; CHECK-NEXT: [[FIRST_REG:%[0-9]+]]:fpr128 = SUBREG_TO_REG 0, killed [[LD_i32]], %subreg.ssub
+ ; CHECK-NEXT: [[LD0_1:%[0-9]+]]:fpr128 = LD1i32 [[FIRST_REG]], 1, killed [[COPY1]]
+ ; CHECK-NEXT: [[LD1_0:%[0-9]+]]:fpr32 = LDRSui [[COPY2]], 0
+ ; CHECK-NEXT: [[SECOND_REG:%[0-9]+]]:fpr128 = SUBREG_TO_REG 0, killed [[LD1_0]], %subreg.ssub
+ ; CHECK-NEXT: [[LD1_1:%[0-9]+]]:fpr128 = LD1i32 [[SECOND_REG]], 1, killed [[COPY3]]
+ ; CHECK-NEXT: [[ZIP:%[0-9]+]]:fpr128 = ZIP1v2i64 [[LD0_1]], [[LD1_1]]
+ ; CHECK-NEXT: $q0 = COPY [[ZIP]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %0:gpr64common = COPY $x0
+ %1:gpr64common = COPY $x1
+ %2:gpr64common = COPY $x2
+ %3:gpr64common = COPY $x3
+ %4:gpr64common = COPY $x4
+ %5:fpr32 = LDRSui %0, 0
+ %6:fpr128 = SUBREG_TO_REG 0, killed %5, %subreg.ssub
+ %7:fpr128 = LD1i32 %6, 1, killed %1
+ %8:fpr128 = LD1i32 %7, 2, killed %2
+ %9:fpr128 = LD1i32 %8, 3, killed %3
+ $q0 = COPY %9
+ RET_ReallyLR implicit $q0
+
+---
+name: split_loads_to_fpr128_i16
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8
+
+ ; CHECK-LABEL: name: split_loads_to_fpr128_i16
+ ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64common = COPY $x2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64common = COPY $x3
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64common = COPY $x4
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gpr64common = COPY $x5
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gpr64common = COPY $x6
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:gpr64common = COPY $x7
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:gpr64common = COPY $x8
+ ; CHECK-NEXT: [[LD_i16:%[0-9]+]]:fpr16 = LDRHroX [[COPY]], killed [[COPY1]], 0, 1
+ ; CHECK-NEXT: [[FIRST_REG:%[0-9]+]]:fpr128 = SUBREG_TO_REG 0, killed [[LD_i16]], %subreg.hsub
+ ; CHECK-NEXT: [[LD0_1:%[0-9]+]]:fpr128 = LD1i16 [[FIRST_REG]], 1, killed [[COPY2]]
+ ; CHECK-NEXT: [[LD0_2:%[0-9]+]]:fpr128 = LD1i16 [[LD0_1]], 2, killed [[COPY3]]
+ ; CHECK-NEXT: [[LD0_3:%[0-9]+]]:fpr128 = LD1i16 [[LD0_2]], 3, killed [[COPY4]]
+ ; CHECK-NEXT: [[LD1_0:%[0-9]+]]:fpr16 = LDRHui [[COPY5]], 0
+ ; CHECK-NEXT: [[SECOND_REG:%[0-9]+]]:fpr128 = SUBREG_TO_REG 0, killed [[LD1_0]], %subreg.hsub
+ ; CHECK-NEXT: [[LD1_1:%[0-9]+]]:fpr128 = LD1i16 [[SECOND_REG]], 1, killed [[COPY6]]
+ ; CHECK-NEXT: [[LD1_2:%[0-9]+]]:fpr128 = LD1i16 [[LD1_1]], 2, killed [[COPY7]]
+ ; CHECK-NEXT: [[LD1_3:%[0-9]+]]:fpr128 = LD1i16 [[LD1_2]], 3, killed [[COPY8]]
+ ; CHECK-NEXT: [[ZIP:%[0-9]+]]:fpr128 = ZIP1v2i64 [[LD0_3]], [[LD1_3]]
+ ; CHECK-NEXT: $q0 = COPY [[ZIP]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %0:gpr64common = COPY $x0
+ %1:gpr64common = COPY $x1
+ %2:gpr64common = COPY $x2
+ %3:gpr64common = COPY $x3
+ %4:gpr64common = COPY $x4
+ %5:gpr64common = COPY $x5
+ %6:gpr64common = COPY $x6
+ %7:gpr64common = COPY $x7
+ %8:gpr64common = COPY $x8
+ %9:fpr16 = LDRHroX %0, killed %1, 0, 1
+ %10:fpr128 = SUBREG_TO_REG 0, killed %9, %subreg.hsub
+ %11:fpr128 = LD1i16 %10, 1, killed %2
+ %12:fpr128 = LD1i16 %11, 2, killed %3
+ %13:fpr128 = LD1i16 %12, 3, killed %4
+ %14:fpr128 = LD1i16 %13, 4, killed %5
+ %15:fpr128 = LD1i16 %14, 5, killed %6
+ %16:fpr128 = LD1i16 %15, 6, killed %7
+ %17:fpr128 = LD1i16 %16, 7, killed %8
+ $q0 = COPY %17
+ RET_ReallyLR implicit $q0
+
+---
+name: split_loads_to_fpr128_i16_ui
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8
+
+ ; CHECK-LABEL: name: split_loads_to_fpr128_i16_ui
+ ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64common = COPY $x2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64common = COPY $x3
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64common = COPY $x4
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gpr64common = COPY $x5
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gpr64common = COPY $x6
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:gpr64common = COPY $x7
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:gpr64common = COPY $x8
+ ; CHECK-NEXT: [[LD_i16:%[0-9]+]]:fpr16 = LDRHui [[COPY]], 0
+ ; CHECK-NEXT: [[FIRST_REG:%[0-9]+]]:fpr128 = SUBREG_TO_REG 0, killed [[LD_i16]], %subreg.hsub
+ ; CHECK-NEXT: [[LD0_1:%[0-9]+]]:fpr128 = LD1i16 [[FIRST_REG]], 1, killed [[COPY1]]
+ ; CHECK-NEXT: [[LD0_2:%[0-9]+]]:fpr128 = LD1i16 [[LD0_1]], 2, killed [[COPY2]]
+ ; CHECK-NEXT: [[LD0_3:%[0-9]+]]:fpr128 = LD1i16 [[LD0_2]], 3, killed [[COPY3]]
+ ; CHECK-NEXT: [[LD1_0:%[0-9]+]]:fpr16 = LDRHui [[COPY4]], 0
+ ; CHECK-NEXT: [[SECOND_REG:%[0-9]+]]:fpr128 = SUBREG_TO_REG 0, killed [[LD1_0]], %subreg.hsub
+ ; CHECK-NEXT: [[LD1_1:%[0-9]+]]:fpr128 = LD1i16 [[SECOND_REG]], 1, killed [[COPY5]]
+ ; CHECK-NEXT: [[LD1_2:%[0-9]+]]:fpr128 = LD1i16 [[LD1_1]], 2, killed [[COPY6]]
+ ; CHECK-NEXT: [[LD1_3:%[0-9]+]]:fpr128 = LD1i16 [[LD1_2]], 3, killed [[COPY7]]
+ ; CHECK-NEXT: [[ZIP:%[0-9]+]]:fpr128 = ZIP1v2i64 [[LD0_3]], [[LD1_3]]
+ ; CHECK-NEXT: $q0 = COPY [[ZIP]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %0:gpr64common = COPY $x0
+ %1:gpr64common = COPY $x1
+ %2:gpr64common = COPY $x2
+ %3:gpr64common = COPY $x3
+ %4:gpr64common = COPY $x4
+ %5:gpr64common = COPY $x5
+ %6:gpr64common = COPY $x6
+ %7:gpr64common = COPY $x7
+ %8:gpr64common = COPY $x8
+ %9:fpr16 = LDRHui %0, 0
+ %10:fpr128 = SUBREG_TO_REG 0, killed %9, %subreg.hsub
+ %11:fpr128 = LD1i16 %10, 1, killed %1
+ %12:fpr128 = LD1i16 %11, 2, killed %2
+ %13:fpr128 = LD1i16 %12, 3, killed %3
+ %14:fpr128 = LD1i16 %13, 4, killed %4
+ %15:fpr128 = LD1i16 %14, 5, killed %5
+ %16:fpr128 = LD1i16 %15, 6, killed %6
+ %17:fpr128 = LD1i16 %16, 7, killed %7
+ $q0 = COPY %17
+ RET_ReallyLR implicit $q0
+
+---
+name: split_loads_to_fpr128_i8
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x16
+
+ ; CHECK-LABEL: name: split_loads_to_fpr128_i8
+ ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64common = COPY $x2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64common = COPY $x3
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64common = COPY $x4
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gpr64common = COPY $x5
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gpr64common = COPY $x6
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:gpr64common = COPY $x7
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:gpr64common = COPY $x8
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:gpr64common = COPY $x9
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:gpr64common = COPY $x10
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:gpr64common = COPY $x11
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:gpr64common = COPY $x12
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:gpr64common = COPY $x13
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:gpr64common = COPY $x14
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:gpr64common = COPY $x15
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:gpr64common = COPY $x16
+ ; CHECK-NEXT: [[LD_i8:%[0-9]+]]:fpr8 = LDRBroX [[COPY]], killed [[COPY1]], 0, 0
+ ; CHECK-NEXT: [[FIRST_REG:%[0-9]+]]:fpr128 = SUBREG_TO_REG 0, killed [[LD_i8]], %subreg.bsub
+ ; CHECK-NEXT: [[LD0_1:%[0-9]+]]:fpr128 = LD1i8 [[FIRST_REG]], 1, killed [[COPY2]]
+ ; CHECK-NEXT: [[LD0_2:%[0-9]+]]:fpr128 = LD1i8 [[LD0_1]], 2, killed [[COPY3]]
+ ; CHECK-NEXT: [[LD0_3:%[0-9]+]]:fpr128 = LD1i8 [[LD0_2]], 3, killed [[COPY4]]
+ ; CHECK-NEXT: [[LD0_4:%[0-9]+]]:fpr128 = LD1i8 [[LD0_3]], 4, killed [[COPY5]]
+ ; CHECK-NEXT: [[LD0_5:%[0-9]+]]:fpr128 = LD1i8 [[LD0_4]], 5, killed [[COPY6]]
+ ; CHECK-NEXT: [[LD0_6:%[0-9]+]]:fpr128 = LD1i8 [[LD0_5]], 6, killed [[COPY7]]
+ ; CHECK-NEXT: [[LD0_7:%[0-9]+]]:fpr128 = LD1i8 [[LD0_6]], 7, killed [[COPY8]]
+ ; CHECK-NEXT: [[LD1_0:%[0-9]+]]:fpr8 = LDRBui [[COPY9]], 0
+ ; CHECK-NEXT: [[SECOND_REG:%[0-9]+]]:fpr128 = SUBREG_TO_REG 0, killed [[LD1_0]], %subreg.bsub
+ ; CHECK-NEXT: [[LD1_1:%[0-9]+]]:fpr128 = LD1i8 [[SECOND_REG]], 1, killed [[COPY10]]
+ ; CHECK-NEXT: [[LD1_2:%[0-9]+]]:fpr128 = LD1i8 [[LD1_1]], 2, killed [[COPY11]]
+ ; CHECK-NEXT: [[LD1_3:%[0-9]+]]:fpr128 = LD1i8 [[LD1_2]], 3, killed [[COPY12]]
+ ; CHECK-NEXT: [[LD1_4:%[0-9]+]]:fpr128 = LD1i8 [[LD1_3]], 4, killed [[COPY13]]
+ ; CHECK-NEXT: [[LD1_5:%[0-9]+]]:fpr128 = LD1i8 [[LD1_4]], 5, killed [[COPY14]]
+ ; CHECK-NEXT: [[LD1_6:%[0-9]+]]:fpr128 = LD1i8 [[LD1_5]], 6, killed [[COPY15]]
+ ; CHECK-NEXT: [[LD1_7:%[0-9]+]]:fpr128 = LD1i8 [[LD1_6]], 7, killed [[COPY16]]
+ ; CHECK-NEXT: [[ZIP:%[0-9]+]]:fpr128 = ZIP1v2i64 [[LD0_7]], [[LD1_7]]
+ ; CHECK-NEXT: $q0 = COPY [[ZIP]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %0:gpr64common = COPY $x0
+ %1:gpr64common = COPY $x1
+ %2:gpr64common = COPY $x2
+ %3:gpr64common = COPY $x3
+ %4:gpr64common = COPY $x4
+ %5:gpr64common = COPY $x5
+ %6:gpr64common = COPY $x6
+ %7:gpr64common = COPY $x7
+ %8:gpr64common = COPY $x8
+ %9:gpr64common = COPY $x9
+ %10:gpr64common = COPY $x10
+ %11:gpr64common = COPY $x11
+ %12:gpr64common = COPY $x12
+ %13:gpr64common = COPY $x13
+ %14:gpr64common = COPY $x14
+ %15:gpr64common = COPY $x15
+ %16:gpr64common = COPY $x16
+ %17:fpr8 = LDRBroX %0, killed %1, 0, 0
+ %18:fpr128 = SUBREG_TO_REG 0, killed %17, %subreg.bsub
+ %19:fpr128 = LD1i8 %18, 1, killed %2
+ %20:fpr128 = LD1i8 %19, 2, killed %3
+ %21:fpr128 = LD1i8 %20, 3, killed %4
+ %22:fpr128 = LD1i8 %21, 4, killed %5
+ %23:fpr128 = LD1i8 %22, 5, killed %6
+ %24:fpr128 = LD1i8 %23, 6, killed %7
+ %25:fpr128 = LD1i8 %24, 7, killed %8
+ %26:fpr128 = LD1i8 %25, 8, killed %9
+ %27:fpr128 = LD1i8 %26, 9, killed %10
+ %28:fpr128 = LD1i8 %27, 10, killed %11
+ %29:fpr128 = LD1i8 %28, 11, killed %12
+ %30:fpr128 = LD1i8 %29, 12, killed %13
+ %31:fpr128 = LD1i8 %30, 13, killed %14
+ %32:fpr128 = LD1i8 %31, 14, killed %15
+ %33:fpr128 = LD1i8 %32, 15, killed %16
+ $q0 = COPY %33
+ RET_ReallyLR implicit $q0
+
+---
+name: negative_pattern_missing_lanes
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: negative_pattern_missing_lanes
+ ; CHECK: [[LD1:%.*]]:fpr128 = LDRQui $x1, 0
+ ; CHECK-NEXT: [[LD2:%.*]]:fpr128 = LD1i32 [[LD1]]
+
+ %0:gpr64common = COPY $x0
+ %1:fpr128 = LDRQui $x1, 0
+ %2:fpr128 = LD1i32 %1, 3, %0
+ $q0 = COPY %2
+ RET_ReallyLR implicit $q0
+
+---
+name: out_of_order_lanes
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1, $x2, $x3, $x4
+
+ ; CHECK-LABEL: name: out_of_order_lanes
+ ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64common = COPY $x2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64common = COPY $x3
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64common = COPY $x4
+ ; CHECK-NEXT: [[LD_i32:%[0-9]+]]:fpr32 = LDRSroX [[COPY]], killed [[COPY1]], 0, 1
+ ; CHECK-NEXT: [[FIRST_REG:%[0-9]+]]:fpr128 = SUBREG_TO_REG 0, killed [[LD_i32]], %subreg.ssub
+ ; CHECK-NEXT: [[LD0_1:%[0-9]+]]:fpr128 = LD1i32 [[FIRST_REG]], 1, killed [[COPY3]]
+ ; CHECK-NEXT: [[LD1_0:%[0-9]+]]:fpr32 = LDRSui [[COPY2]], 0
+ ; CHECK-NEXT: [[SECOND_REG:%[0-9]+]]:fpr128 = SUBREG_TO_REG 0, killed [[LD1_0]], %subreg.ssub
+ ; CHECK-NEXT: [[LD1_1:%[0-9]+]]:fpr128 = LD1i32 [[SECOND_REG]], 1, killed [[COPY4]]
+ ; CHECK-NEXT: [[ZIP:%[0-9]+]]:fpr128 = ZIP1v2i64 [[LD0_1]], [[LD1_1]]
+ ; CHECK-NEXT: $q0 = COPY [[ZIP]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %0:gpr64common = COPY $x0
+ %1:gpr64common = COPY $x1
+ %2:gpr64common = COPY $x2
+ %3:gpr64common = COPY $x3
+ %4:gpr64common = COPY $x4
+ %5:fpr32 = LDRSroX %0, killed %1, 0, 1
+ %6:fpr128 = SUBREG_TO_REG 0, killed %5, %subreg.ssub
+ %7:fpr128 = LD1i32 %6, 2, killed %2
+ %8:fpr128 = LD1i32 %7, 1, killed %3
+ %9:fpr128 = LD1i32 %8, 3, killed %4
+ $q0 = COPY %9
+ RET_ReallyLR implicit $q0
+
+---
+name: negative_pattern_no_subreg_to_reg
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1, $x2, $x3
+
+ ; CHECK-LABEL: name: negative_pattern_no_subreg_to_reg
+ ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64common = COPY $x2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64common = COPY $x3
+ ; CHECK-NEXT: [[INITIAL_VEC:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0
+ ; CHECK-NEXT: [[LD_LANE_1:%[0-9]+]]:fpr128 = LD1i32 [[INITIAL_VEC]], 1, killed [[COPY1]]
+ ; CHECK-NEXT: [[LD_LANE_2:%[0-9]+]]:fpr128 = LD1i32 [[LD_LANE_1]], 2, killed [[COPY2]]
+ ; CHECK-NEXT: [[LD_LANE_3:%[0-9]+]]:fpr128 = LD1i32 [[LD_LANE_2]], 3, killed [[COPY3]]
+ ; CHECK-NEXT: $q0 = COPY [[LD_LANE_3]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %0:gpr64common = COPY $x0
+ %1:gpr64common = COPY $x1
+ %2:gpr64common = COPY $x2
+ %3:gpr64common = COPY $x3
+ %4:fpr128 = LDRQui %0, 0
+ %5:fpr128 = LD1i32 %4, 1, killed %1
+ %6:fpr128 = LD1i32 %5, 2, killed %2
+ %7:fpr128 = LD1i32 %6, 3, killed %3
+ $q0 = COPY %7
+ RET_ReallyLR implicit $q0
+
+---
+name: negative_pattern_multiple_users
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1, $x2, $x3, $x4
+
+ ; CHECK-LABEL: name: negative_pattern_multiple_users
+ ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64common = COPY $x2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64common = COPY $x3
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64common = COPY $x4
+ ; CHECK-NEXT: [[LD_i32:%[0-9]+]]:fpr32 = LDRSroX [[COPY]], killed [[COPY1]], 0, 1
+ ; CHECK-NEXT: [[FIRST_REG:%[0-9]+]]:fpr128 = SUBREG_TO_REG 0, killed [[LD_i32]], %subreg.ssub
+ ; CHECK-NEXT: [[LD_LANE_1:%[0-9]+]]:fpr128 = LD1i32 [[FIRST_REG]], 1, killed [[COPY2]]
+ ; CHECK-NEXT: [[LD_LANE_2:%[0-9]+]]:fpr128 = LD1i32 [[LD_LANE_1]], 2, killed [[COPY3]]
+ ; CHECK-NEXT: [[LD_LANE_3:%[0-9]+]]:fpr128 = LD1i32 [[LD_LANE_2]], 3, killed [[COPY4]]
+ ; CHECK-NEXT: $q0 = COPY [[LD_LANE_3]]
+ ; CHECK-NEXT: $q1 = COPY [[LD_LANE_2]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0, implicit $q1
+ %0:gpr64common = COPY $x0
+ %1:gpr64common = COPY $x1
+ %2:gpr64common = COPY $x2
+ %3:gpr64common = COPY $x3
+ %4:gpr64common = COPY $x4
+ %5:fpr32 = LDRSroX %0, killed %1, 0, 1
+ %6:fpr128 = SUBREG_TO_REG 0, killed %5, %subreg.ssub
+ %7:fpr128 = LD1i32 %6, 1, killed %2
+ %8:fpr128 = LD1i32 %7, 2, killed %3
+ %9:fpr128 = LD1i32 %8, 3, killed %4
+ $q0 = COPY %9
+ $q1 = COPY %8
+ RET_ReallyLR implicit $q0, implicit $q1
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-uniform-cases.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-uniform-cases.ll
index 7686740aec30..13434fabefa7 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-uniform-cases.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-uniform-cases.ll
@@ -203,89 +203,93 @@ define <12 x float> @abp90c12(<12 x float> %a, <12 x float> %b, <12 x float> %c)
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $s1 killed $s1 def $q1
; CHECK-NEXT: // kill: def $s3 killed $s3 def $q3
-; CHECK-NEXT: ldr s17, [sp, #40]
-; CHECK-NEXT: add x10, sp, #56
; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT: // kill: def $s2 killed $s2 def $q2
+; CHECK-NEXT: ldr s17, [sp, #32]
+; CHECK-NEXT: // kill: def $s5 killed $s5 def $q5
; CHECK-NEXT: add x9, sp, #48
+; CHECK-NEXT: add x10, sp, #64
; CHECK-NEXT: mov v1.s[1], v3.s[0]
-; CHECK-NEXT: ldr s3, [sp, #32]
-; CHECK-NEXT: // kill: def $s2 killed $s2 def $q2
; CHECK-NEXT: mov v0.s[1], v2.s[0]
-; CHECK-NEXT: ld1 { v17.s }[1], [x10]
-; CHECK-NEXT: // kill: def $s5 killed $s5 def $q5
-; CHECK-NEXT: ldr s16, [sp, #8]
; CHECK-NEXT: // kill: def $s4 killed $s4 def $q4
-; CHECK-NEXT: add x10, sp, #24
-; CHECK-NEXT: ld1 { v3.s }[1], [x9]
-; CHECK-NEXT: add x9, sp, #72
-; CHECK-NEXT: // kill: def $s7 killed $s7 def $q7
+; CHECK-NEXT: add x11, sp, #72
+; CHECK-NEXT: ld1 { v17.s }[1], [x9]
+; CHECK-NEXT: ldr s18, [x10]
+; CHECK-NEXT: add x9, sp, #80
+; CHECK-NEXT: add x10, sp, #56
; CHECK-NEXT: // kill: def $s6 killed $s6 def $q6
+; CHECK-NEXT: // kill: def $s7 killed $s7 def $q7
+; CHECK-NEXT: ldr s16, [sp, #8]
+; CHECK-NEXT: ldr s3, [sp, #96]
+; CHECK-NEXT: ld1 { v18.s }[1], [x9]
+; CHECK-NEXT: add x9, sp, #88
; CHECK-NEXT: ldr s2, [sp]
-; CHECK-NEXT: ld1 { v16.s }[1], [x10]
-; CHECK-NEXT: add x10, sp, #112
-; CHECK-NEXT: ldr s20, [sp, #136]
; CHECK-NEXT: mov v1.s[2], v5.s[0]
-; CHECK-NEXT: ld1 { v17.s }[2], [x9]
-; CHECK-NEXT: add x9, sp, #64
-; CHECK-NEXT: ldr s5, [sp, #96]
-; CHECK-NEXT: ld1 { v3.s }[2], [x9]
+; CHECK-NEXT: ldr s5, [sp, #40]
; CHECK-NEXT: mov v0.s[2], v4.s[0]
-; CHECK-NEXT: add x9, sp, #88
-; CHECK-NEXT: ldr s4, [sp, #104]
-; CHECK-NEXT: ldr s19, [sp, #192]
; CHECK-NEXT: ld1 { v5.s }[1], [x10]
-; CHECK-NEXT: add x10, sp, #80
-; CHECK-NEXT: ld1 { v17.s }[3], [x9]
-; CHECK-NEXT: mov v1.s[3], v7.s[0]
-; CHECK-NEXT: add x9, sp, #120
-; CHECK-NEXT: ld1 { v3.s }[3], [x10]
-; CHECK-NEXT: ld1 { v4.s }[1], [x9]
-; CHECK-NEXT: ldr s7, [sp, #128]
+; CHECK-NEXT: ldr s19, [x11]
; CHECK-NEXT: add x10, sp, #144
+; CHECK-NEXT: zip1 v4.2d, v17.2d, v18.2d
+; CHECK-NEXT: add x11, sp, #160
+; CHECK-NEXT: ldr s18, [sp, #136]
+; CHECK-NEXT: ld1 { v19.s }[1], [x9]
; CHECK-NEXT: mov v0.s[3], v6.s[0]
-; CHECK-NEXT: add x9, sp, #16
+; CHECK-NEXT: ldr s6, [sp, #128]
+; CHECK-NEXT: mov v1.s[3], v7.s[0]
+; CHECK-NEXT: add x9, sp, #24
+; CHECK-NEXT: ldr s7, [sp, #104]
+; CHECK-NEXT: ld1 { v16.s }[1], [x9]
+; CHECK-NEXT: add x9, sp, #112
+; CHECK-NEXT: ld1 { v6.s }[1], [x10]
+; CHECK-NEXT: zip1 v5.2d, v5.2d, v19.2d
+; CHECK-NEXT: add x10, sp, #120
+; CHECK-NEXT: ld1 { v3.s }[1], [x9]
; CHECK-NEXT: ld1 { v7.s }[1], [x10]
-; CHECK-NEXT: ld1 { v2.s }[1], [x9]
-; CHECK-NEXT: add x9, sp, #160
-; CHECK-NEXT: fmul v6.4s, v17.4s, v1.4s
-; CHECK-NEXT: fmul v18.4s, v4.4s, v16.4s
-; CHECK-NEXT: fmul v16.4s, v5.4s, v16.4s
-; CHECK-NEXT: fmul v1.4s, v3.4s, v1.4s
-; CHECK-NEXT: add x10, sp, #208
-; CHECK-NEXT: ld1 { v7.s }[2], [x9]
-; CHECK-NEXT: add x9, sp, #152
-; CHECK-NEXT: ld1 { v19.s }[1], [x10]
-; CHECK-NEXT: ld1 { v20.s }[1], [x9]
+; CHECK-NEXT: ldr s17, [x11]
; CHECK-NEXT: add x9, sp, #176
-; CHECK-NEXT: add x10, sp, #184
-; CHECK-NEXT: fneg v6.4s, v6.4s
-; CHECK-NEXT: fneg v18.4s, v18.4s
-; CHECK-NEXT: fmla v16.4s, v2.4s, v4.4s
-; CHECK-NEXT: fmla v1.4s, v0.4s, v17.4s
-; CHECK-NEXT: ld1 { v7.s }[3], [x9]
-; CHECK-NEXT: add x9, sp, #168
-; CHECK-NEXT: ld1 { v20.s }[2], [x9]
-; CHECK-NEXT: ldr s4, [sp, #200]
+; CHECK-NEXT: add x10, sp, #16
+; CHECK-NEXT: add x11, sp, #168
+; CHECK-NEXT: ld1 { v17.s }[1], [x9]
+; CHECK-NEXT: ld1 { v2.s }[1], [x10]
+; CHECK-NEXT: add x9, sp, #152
+; CHECK-NEXT: fmul v19.4s, v5.4s, v1.4s
+; CHECK-NEXT: fmul v20.4s, v7.4s, v16.4s
+; CHECK-NEXT: fmul v16.4s, v3.4s, v16.4s
+; CHECK-NEXT: fmul v1.4s, v4.4s, v1.4s
+; CHECK-NEXT: ld1 { v18.s }[1], [x9]
+; CHECK-NEXT: ldr s21, [x11]
+; CHECK-NEXT: zip1 v6.2d, v6.2d, v17.2d
+; CHECK-NEXT: ldr s17, [sp, #192]
+; CHECK-NEXT: add x9, sp, #184
+; CHECK-NEXT: add x10, sp, #208
+; CHECK-NEXT: ld1 { v21.s }[1], [x9]
; CHECK-NEXT: add x9, sp, #216
-; CHECK-NEXT: fmla v6.4s, v0.4s, v3.4s
-; CHECK-NEXT: fmla v18.4s, v2.4s, v5.4s
-; CHECK-NEXT: ld1 { v4.s }[1], [x9]
-; CHECK-NEXT: fsub v0.4s, v7.4s, v1.4s
-; CHECK-NEXT: fsub v1.4s, v19.4s, v16.4s
-; CHECK-NEXT: ld1 { v20.s }[3], [x10]
-; CHECK-NEXT: fadd v2.4s, v4.4s, v18.4s
-; CHECK-NEXT: fadd v3.4s, v20.4s, v6.4s
+; CHECK-NEXT: fneg v19.4s, v19.4s
+; CHECK-NEXT: fneg v20.4s, v20.4s
+; CHECK-NEXT: fmla v16.4s, v2.4s, v7.4s
+; CHECK-NEXT: fmla v1.4s, v0.4s, v5.4s
+; CHECK-NEXT: ld1 { v17.s }[1], [x10]
+; CHECK-NEXT: ldr s5, [sp, #200]
+; CHECK-NEXT: zip1 v7.2d, v18.2d, v21.2d
+; CHECK-NEXT: ld1 { v5.s }[1], [x9]
+; CHECK-NEXT: fmla v19.4s, v0.4s, v4.4s
+; CHECK-NEXT: fmla v20.4s, v2.4s, v3.4s
+; CHECK-NEXT: fsub v0.4s, v6.4s, v1.4s
+; CHECK-NEXT: fsub v1.4s, v17.4s, v16.4s
+; CHECK-NEXT: fadd v2.4s, v7.4s, v19.4s
+; CHECK-NEXT: fadd v3.4s, v5.4s, v20.4s
; CHECK-NEXT: ext v4.16b, v0.16b, v1.16b, #12
-; CHECK-NEXT: ext v5.16b, v3.16b, v2.16b, #12
-; CHECK-NEXT: trn2 v1.4s, v1.4s, v2.4s
+; CHECK-NEXT: ext v5.16b, v2.16b, v3.16b, #12
+; CHECK-NEXT: trn2 v1.4s, v1.4s, v3.4s
; CHECK-NEXT: ext v4.16b, v0.16b, v4.16b, #12
-; CHECK-NEXT: ext v5.16b, v3.16b, v5.16b, #8
+; CHECK-NEXT: ext v5.16b, v2.16b, v5.16b, #8
; CHECK-NEXT: rev64 v4.4s, v4.4s
-; CHECK-NEXT: trn2 v2.4s, v4.4s, v5.4s
-; CHECK-NEXT: zip2 v4.4s, v0.4s, v3.4s
-; CHECK-NEXT: zip1 v0.4s, v0.4s, v3.4s
-; CHECK-NEXT: ext v1.16b, v2.16b, v1.16b, #8
-; CHECK-NEXT: mov v4.d[1], v2.d[0]
+; CHECK-NEXT: trn2 v3.4s, v4.4s, v5.4s
+; CHECK-NEXT: zip2 v4.4s, v0.4s, v2.4s
+; CHECK-NEXT: zip1 v0.4s, v0.4s, v2.4s
+; CHECK-NEXT: ext v1.16b, v3.16b, v1.16b, #8
+; CHECK-NEXT: mov v4.d[1], v3.d[0]
; CHECK-NEXT: str q0, [x8]
; CHECK-NEXT: stp q4, q1, [x8, #16]
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/concat-vector.ll b/llvm/test/CodeGen/AArch64/concat-vector.ll
index acf15f1bd117..e6f27b95d92c 100644
--- a/llvm/test/CodeGen/AArch64/concat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/concat-vector.ll
@@ -186,8 +186,9 @@ define <16 x i8> @concat_v16s8_v4s8_load(ptr %ptrA, ptr %ptrB, ptr %ptrC, ptr %p
; CHECK: // %bb.0:
; CHECK-NEXT: ldr s0, [x0]
; CHECK-NEXT: ld1 { v0.s }[1], [x1]
-; CHECK-NEXT: ld1 { v0.s }[2], [x2]
-; CHECK-NEXT: ld1 { v0.s }[3], [x3]
+; CHECK-NEXT: ldr s1, [x2]
+; CHECK-NEXT: ld1 { v1.s }[1], [x3]
+; CHECK-NEXT: zip1 v0.2d, v0.2d, v1.2d
; CHECK-NEXT: ret
%A = load <4 x i8>, ptr %ptrA
%B = load <4 x i8>, ptr %ptrB
diff --git a/llvm/test/CodeGen/AArch64/fp-maximumnum-minimumnum.ll b/llvm/test/CodeGen/AArch64/fp-maximumnum-minimumnum.ll
index c6b8e41f9bdf..4906e2e15e51 100644
--- a/llvm/test/CodeGen/AArch64/fp-maximumnum-minimumnum.ll
+++ b/llvm/test/CodeGen/AArch64/fp-maximumnum-minimumnum.ll
@@ -1431,6 +1431,7 @@ define <9 x half> @max_v9f16(<9 x half> %a, <9 x half> %b) {
; FULLFP16-NEXT: add x9, sp, #16
; FULLFP16-NEXT: // kill: def $h3 killed $h3 def $q3
; FULLFP16-NEXT: // kill: def $h4 killed $h4 def $q4
+; FULLFP16-NEXT: add x10, sp, #40
; FULLFP16-NEXT: // kill: def $h5 killed $h5 def $q5
; FULLFP16-NEXT: // kill: def $h6 killed $h6 def $q6
; FULLFP16-NEXT: // kill: def $h7 killed $h7 def $q7
@@ -1439,30 +1440,30 @@ define <9 x half> @max_v9f16(<9 x half> %a, <9 x half> %b) {
; FULLFP16-NEXT: ld1 { v1.h }[1], [x9]
; FULLFP16-NEXT: add x9, sp, #24
; FULLFP16-NEXT: mov v0.h[2], v2.h[0]
-; FULLFP16-NEXT: ldr h2, [sp]
; FULLFP16-NEXT: ld1 { v1.h }[2], [x9]
; FULLFP16-NEXT: add x9, sp, #32
-; FULLFP16-NEXT: fminnm v2.8h, v2.8h, v2.8h
; FULLFP16-NEXT: mov v0.h[3], v3.h[0]
; FULLFP16-NEXT: ld1 { v1.h }[3], [x9]
-; FULLFP16-NEXT: add x9, sp, #40
-; FULLFP16-NEXT: ldr h3, [sp, #72]
-; FULLFP16-NEXT: ld1 { v1.h }[4], [x9]
+; FULLFP16-NEXT: ldr h2, [x10]
; FULLFP16-NEXT: add x9, sp, #48
+; FULLFP16-NEXT: ldr h3, [sp, #72]
+; FULLFP16-NEXT: ld1 { v2.h }[1], [x9]
+; FULLFP16-NEXT: add x9, sp, #56
; FULLFP16-NEXT: fminnm v3.8h, v3.8h, v3.8h
; FULLFP16-NEXT: mov v0.h[4], v4.h[0]
-; FULLFP16-NEXT: ld1 { v1.h }[5], [x9]
-; FULLFP16-NEXT: add x9, sp, #56
-; FULLFP16-NEXT: fmaxnm v2.8h, v2.8h, v3.8h
-; FULLFP16-NEXT: mov v0.h[5], v5.h[0]
-; FULLFP16-NEXT: ld1 { v1.h }[6], [x9]
+; FULLFP16-NEXT: ld1 { v2.h }[2], [x9]
; FULLFP16-NEXT: add x9, sp, #64
-; FULLFP16-NEXT: str h2, [x8, #16]
+; FULLFP16-NEXT: mov v0.h[5], v5.h[0]
+; FULLFP16-NEXT: ld1 { v2.h }[3], [x9]
+; FULLFP16-NEXT: zip1 v1.2d, v1.2d, v2.2d
+; FULLFP16-NEXT: ldr h2, [sp]
; FULLFP16-NEXT: mov v0.h[6], v6.h[0]
-; FULLFP16-NEXT: ld1 { v1.h }[7], [x9]
+; FULLFP16-NEXT: fminnm v2.8h, v2.8h, v2.8h
; FULLFP16-NEXT: fminnm v1.8h, v1.8h, v1.8h
; FULLFP16-NEXT: mov v0.h[7], v7.h[0]
+; FULLFP16-NEXT: fmaxnm v2.8h, v2.8h, v3.8h
; FULLFP16-NEXT: fminnm v0.8h, v0.8h, v0.8h
+; FULLFP16-NEXT: str h2, [x8, #16]
; FULLFP16-NEXT: fmaxnm v0.8h, v0.8h, v1.8h
; FULLFP16-NEXT: str q0, [x8]
; FULLFP16-NEXT: ret
@@ -2012,6 +2013,7 @@ define <9 x half> @min_v9f16(<9 x half> %a, <9 x half> %b) {
; FULLFP16-NEXT: add x9, sp, #16
; FULLFP16-NEXT: // kill: def $h3 killed $h3 def $q3
; FULLFP16-NEXT: // kill: def $h4 killed $h4 def $q4
+; FULLFP16-NEXT: add x10, sp, #40
; FULLFP16-NEXT: // kill: def $h5 killed $h5 def $q5
; FULLFP16-NEXT: // kill: def $h6 killed $h6 def $q6
; FULLFP16-NEXT: // kill: def $h7 killed $h7 def $q7
@@ -2020,30 +2022,30 @@ define <9 x half> @min_v9f16(<9 x half> %a, <9 x half> %b) {
; FULLFP16-NEXT: ld1 { v1.h }[1], [x9]
; FULLFP16-NEXT: add x9, sp, #24
; FULLFP16-NEXT: mov v0.h[2], v2.h[0]
-; FULLFP16-NEXT: ldr h2, [sp]
; FULLFP16-NEXT: ld1 { v1.h }[2], [x9]
; FULLFP16-NEXT: add x9, sp, #32
-; FULLFP16-NEXT: fminnm v2.8h, v2.8h, v2.8h
; FULLFP16-NEXT: mov v0.h[3], v3.h[0]
; FULLFP16-NEXT: ld1 { v1.h }[3], [x9]
-; FULLFP16-NEXT: add x9, sp, #40
-; FULLFP16-NEXT: ldr h3, [sp, #72]
-; FULLFP16-NEXT: ld1 { v1.h }[4], [x9]
+; FULLFP16-NEXT: ldr h2, [x10]
; FULLFP16-NEXT: add x9, sp, #48
+; FULLFP16-NEXT: ldr h3, [sp, #72]
+; FULLFP16-NEXT: ld1 { v2.h }[1], [x9]
+; FULLFP16-NEXT: add x9, sp, #56
; FULLFP16-NEXT: fminnm v3.8h, v3.8h, v3.8h
; FULLFP16-NEXT: mov v0.h[4], v4.h[0]
-; FULLFP16-NEXT: ld1 { v1.h }[5], [x9]
-; FULLFP16-NEXT: add x9, sp, #56
-; FULLFP16-NEXT: fminnm v2.8h, v2.8h, v3.8h
-; FULLFP16-NEXT: mov v0.h[5], v5.h[0]
-; FULLFP16-NEXT: ld1 { v1.h }[6], [x9]
+; FULLFP16-NEXT: ld1 { v2.h }[2], [x9]
; FULLFP16-NEXT: add x9, sp, #64
-; FULLFP16-NEXT: str h2, [x8, #16]
+; FULLFP16-NEXT: mov v0.h[5], v5.h[0]
+; FULLFP16-NEXT: ld1 { v2.h }[3], [x9]
+; FULLFP16-NEXT: zip1 v1.2d, v1.2d, v2.2d
+; FULLFP16-NEXT: ldr h2, [sp]
; FULLFP16-NEXT: mov v0.h[6], v6.h[0]
-; FULLFP16-NEXT: ld1 { v1.h }[7], [x9]
+; FULLFP16-NEXT: fminnm v2.8h, v2.8h, v2.8h
; FULLFP16-NEXT: fminnm v1.8h, v1.8h, v1.8h
; FULLFP16-NEXT: mov v0.h[7], v7.h[0]
+; FULLFP16-NEXT: fminnm v2.8h, v2.8h, v3.8h
; FULLFP16-NEXT: fminnm v0.8h, v0.8h, v0.8h
+; FULLFP16-NEXT: str h2, [x8, #16]
; FULLFP16-NEXT: fminnm v0.8h, v0.8h, v1.8h
; FULLFP16-NEXT: str q0, [x8]
; FULLFP16-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/fsh.ll b/llvm/test/CodeGen/AArch64/fsh.ll
index 4c28c9082402..ae2ef2649102 100644
--- a/llvm/test/CodeGen/AArch64/fsh.ll
+++ b/llvm/test/CodeGen/AArch64/fsh.ll
@@ -2509,87 +2509,88 @@ define <7 x i32> @fshl_v7i32(<7 x i32> %a, <7 x i32> %b, <7 x i32> %c) {
;
; CHECK-GI-LABEL: fshl_v7i32:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ldr s3, [sp, #48]
-; CHECK-GI-NEXT: ldr s20, [sp, #56]
-; CHECK-GI-NEXT: add x9, sp, #56
+; CHECK-GI-NEXT: ldr s17, [sp, #48]
+; CHECK-GI-NEXT: add x8, sp, #56
+; CHECK-GI-NEXT: add x9, sp, #64
; CHECK-GI-NEXT: ldr s4, [sp, #48]
-; CHECK-GI-NEXT: ldr s7, [sp, #80]
-; CHECK-GI-NEXT: mov w12, #-1 // =0xffffffff
-; CHECK-GI-NEXT: ldr s21, [sp, #88]
-; CHECK-GI-NEXT: mov v3.s[1], v20.s[0]
-; CHECK-GI-NEXT: fmov s20, w12
-; CHECK-GI-NEXT: ld1 { v4.s }[1], [x9]
-; CHECK-GI-NEXT: ldr s17, [sp]
-; CHECK-GI-NEXT: add x13, sp, #64
-; CHECK-GI-NEXT: mov v7.s[1], v21.s[0]
+; CHECK-GI-NEXT: ldr s21, [sp, #56]
+; CHECK-GI-NEXT: mov w10, #-1 // =0xffffffff
+; CHECK-GI-NEXT: ld1 { v17.s }[1], [x8]
+; CHECK-GI-NEXT: ldr s20, [x9]
+; CHECK-GI-NEXT: add x8, sp, #72
+; CHECK-GI-NEXT: mov v4.s[1], v21.s[0]
; CHECK-GI-NEXT: fmov s21, w7
+; CHECK-GI-NEXT: ldr s6, [sp]
+; CHECK-GI-NEXT: ld1 { v20.s }[1], [x8]
; CHECK-GI-NEXT: ldr s19, [sp, #64]
-; CHECK-GI-NEXT: mov w11, #31 // =0x1f
-; CHECK-GI-NEXT: mov v20.s[1], w12
+; CHECK-GI-NEXT: ldr s7, [sp, #80]
+; CHECK-GI-NEXT: ldr s22, [sp, #88]
+; CHECK-GI-NEXT: mov w9, #31 // =0x1f
+; CHECK-GI-NEXT: mov w11, #1 // =0x1
+; CHECK-GI-NEXT: mov v21.s[1], v6.s[0]
+; CHECK-GI-NEXT: fmov s6, w9
; CHECK-GI-NEXT: ldr s18, [sp, #96]
-; CHECK-GI-NEXT: ld1 { v4.s }[2], [x13]
-; CHECK-GI-NEXT: mov w13, #1 // =0x1
-; CHECK-GI-NEXT: mov v3.s[2], v19.s[0]
-; CHECK-GI-NEXT: mov v21.s[1], v17.s[0]
-; CHECK-GI-NEXT: fmov s17, w11
-; CHECK-GI-NEXT: fmov s19, w13
+; CHECK-GI-NEXT: zip1 v17.2d, v17.2d, v20.2d
+; CHECK-GI-NEXT: fmov s20, w10
+; CHECK-GI-NEXT: mov v7.s[1], v22.s[0]
+; CHECK-GI-NEXT: mov v4.s[2], v19.s[0]
+; CHECK-GI-NEXT: fmov s19, w11
; CHECK-GI-NEXT: fmov s23, w0
-; CHECK-GI-NEXT: fmov s24, w11
-; CHECK-GI-NEXT: ldr s6, [sp, #8]
+; CHECK-GI-NEXT: mov v6.s[1], w9
+; CHECK-GI-NEXT: fmov s24, w9
+; CHECK-GI-NEXT: ldr s2, [sp, #8]
+; CHECK-GI-NEXT: mov v20.s[1], w10
; CHECK-GI-NEXT: ldr s0, [sp, #24]
; CHECK-GI-NEXT: ldr s5, [sp, #32]
+; CHECK-GI-NEXT: mov v19.s[1], w11
; CHECK-GI-NEXT: mov v7.s[2], v18.s[0]
-; CHECK-GI-NEXT: mov v17.s[1], w11
-; CHECK-GI-NEXT: mov v19.s[1], w13
-; CHECK-GI-NEXT: mov v20.s[2], w12
; CHECK-GI-NEXT: ldr s16, [sp, #72]
; CHECK-GI-NEXT: mov v23.s[1], w1
; CHECK-GI-NEXT: ldr s18, [sp, #80]
-; CHECK-GI-NEXT: mov v21.s[2], v6.s[0]
-; CHECK-GI-NEXT: mov v24.s[1], w11
+; CHECK-GI-NEXT: mov v21.s[2], v2.s[0]
+; CHECK-GI-NEXT: mov v24.s[1], w9
; CHECK-GI-NEXT: mov v0.s[1], v5.s[0]
-; CHECK-GI-NEXT: fmov s6, w4
-; CHECK-GI-NEXT: add x10, sp, #88
+; CHECK-GI-NEXT: fmov s5, w4
+; CHECK-GI-NEXT: mov v20.s[2], w10
+; CHECK-GI-NEXT: add x8, sp, #88
; CHECK-GI-NEXT: movi v22.4s, #31
-; CHECK-GI-NEXT: mov v3.s[3], v16.s[0]
-; CHECK-GI-NEXT: mov v17.s[2], w11
-; CHECK-GI-NEXT: mov v19.s[2], w13
-; CHECK-GI-NEXT: ldr s2, [sp, #16]
-; CHECK-GI-NEXT: ldr s1, [sp, #40]
-; CHECK-GI-NEXT: ld1 { v18.s }[1], [x10]
-; CHECK-GI-NEXT: eor v5.16b, v7.16b, v20.16b
+; CHECK-GI-NEXT: mov v4.s[3], v16.s[0]
+; CHECK-GI-NEXT: mov v6.s[2], w9
+; CHECK-GI-NEXT: mov v19.s[2], w11
+; CHECK-GI-NEXT: ldr s1, [sp, #16]
+; CHECK-GI-NEXT: ldr s3, [sp, #40]
+; CHECK-GI-NEXT: ld1 { v18.s }[1], [x8]
; CHECK-GI-NEXT: mov v23.s[2], w2
-; CHECK-GI-NEXT: mov v6.s[1], w5
-; CHECK-GI-NEXT: add x8, sp, #72
-; CHECK-GI-NEXT: add x9, sp, #96
-; CHECK-GI-NEXT: mov v21.s[3], v2.s[0]
-; CHECK-GI-NEXT: mov v24.s[2], w11
-; CHECK-GI-NEXT: mov v0.s[2], v1.s[0]
-; CHECK-GI-NEXT: ld1 { v4.s }[3], [x8]
-; CHECK-GI-NEXT: bic v2.16b, v22.16b, v3.16b
-; CHECK-GI-NEXT: ld1 { v18.s }[2], [x9]
-; CHECK-GI-NEXT: and v1.16b, v5.16b, v17.16b
+; CHECK-GI-NEXT: mov v5.s[1], w5
+; CHECK-GI-NEXT: add x8, sp, #96
+; CHECK-GI-NEXT: eor v2.16b, v7.16b, v20.16b
+; CHECK-GI-NEXT: mov v21.s[3], v1.s[0]
+; CHECK-GI-NEXT: mov v24.s[2], w9
+; CHECK-GI-NEXT: mov v0.s[2], v3.s[0]
+; CHECK-GI-NEXT: bic v1.16b, v22.16b, v4.16b
+; CHECK-GI-NEXT: ld1 { v18.s }[2], [x8]
; CHECK-GI-NEXT: neg v3.4s, v19.4s
+; CHECK-GI-NEXT: and v4.16b, v17.16b, v22.16b
+; CHECK-GI-NEXT: and v2.16b, v2.16b, v6.16b
; CHECK-GI-NEXT: mov v23.s[3], w3
-; CHECK-GI-NEXT: mov v6.s[2], w6
-; CHECK-GI-NEXT: and v4.16b, v4.16b, v22.16b
-; CHECK-GI-NEXT: ushr v5.4s, v21.4s, #1
-; CHECK-GI-NEXT: neg v2.4s, v2.4s
-; CHECK-GI-NEXT: and v7.16b, v18.16b, v24.16b
+; CHECK-GI-NEXT: mov v5.s[2], w6
+; CHECK-GI-NEXT: ushr v6.4s, v21.4s, #1
; CHECK-GI-NEXT: neg v1.4s, v1.4s
+; CHECK-GI-NEXT: and v7.16b, v18.16b, v24.16b
; CHECK-GI-NEXT: ushl v0.4s, v0.4s, v3.4s
+; CHECK-GI-NEXT: neg v2.4s, v2.4s
; CHECK-GI-NEXT: ushl v3.4s, v23.4s, v4.4s
-; CHECK-GI-NEXT: ushl v2.4s, v5.4s, v2.4s
-; CHECK-GI-NEXT: ushl v4.4s, v6.4s, v7.4s
-; CHECK-GI-NEXT: ushl v0.4s, v0.4s, v1.4s
-; CHECK-GI-NEXT: orr v1.16b, v3.16b, v2.16b
+; CHECK-GI-NEXT: ushl v1.4s, v6.4s, v1.4s
+; CHECK-GI-NEXT: ushl v4.4s, v5.4s, v7.4s
+; CHECK-GI-NEXT: ushl v0.4s, v0.4s, v2.4s
+; CHECK-GI-NEXT: orr v1.16b, v3.16b, v1.16b
; CHECK-GI-NEXT: orr v0.16b, v4.16b, v0.16b
; CHECK-GI-NEXT: mov s2, v1.s[1]
; CHECK-GI-NEXT: mov s3, v1.s[2]
; CHECK-GI-NEXT: mov s4, v1.s[3]
+; CHECK-GI-NEXT: fmov w0, s1
; CHECK-GI-NEXT: mov s5, v0.s[1]
; CHECK-GI-NEXT: mov s6, v0.s[2]
-; CHECK-GI-NEXT: fmov w0, s1
; CHECK-GI-NEXT: fmov w4, s0
; CHECK-GI-NEXT: fmov w1, s2
; CHECK-GI-NEXT: fmov w2, s3
diff --git a/llvm/test/CodeGen/AArch64/llvm.frexp.ll b/llvm/test/CodeGen/AArch64/llvm.frexp.ll
index 2213aa1429db..4e1876db772e 100644
--- a/llvm/test/CodeGen/AArch64/llvm.frexp.ll
+++ b/llvm/test/CodeGen/AArch64/llvm.frexp.ll
@@ -700,13 +700,14 @@ define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) nounwi
; CHECK-NEXT: ldr s1, [sp, #44]
; CHECK-NEXT: ldr q2, [sp] // 16-byte Folded Reload
; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
-; CHECK-NEXT: ld1 { v1.s }[1], [x19]
; CHECK-NEXT: mov v2.s[3], v0.s[0]
-; CHECK-NEXT: ld1 { v1.s }[2], [x20]
+; CHECK-NEXT: ld1 { v1.s }[1], [x19]
+; CHECK-NEXT: ldr s0, [x20]
+; CHECK-NEXT: ld1 { v0.s }[1], [x21]
; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT: mov v0.16b, v2.16b
-; CHECK-NEXT: ld1 { v1.s }[3], [x21]
; CHECK-NEXT: ldp x30, x21, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: zip1 v1.2d, v1.2d, v0.2d
+; CHECK-NEXT: mov v0.16b, v2.16b
; CHECK-NEXT: add sp, sp, #80
; CHECK-NEXT: ret
;
@@ -872,10 +873,11 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) nounwind {
; CHECK-NEXT: bl frexpf
; CHECK-NEXT: ldr s0, [sp, #28]
; CHECK-NEXT: ld1 { v0.s }[1], [x19]
-; CHECK-NEXT: ld1 { v0.s }[2], [x20]
+; CHECK-NEXT: ldr s1, [x20]
+; CHECK-NEXT: ld1 { v1.s }[1], [x21]
; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: ld1 { v0.s }[3], [x21]
; CHECK-NEXT: ldp x30, x21, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: zip1 v0.2d, v0.2d, v1.2d
; CHECK-NEXT: add sp, sp, #64
; CHECK-NEXT: ret
;
diff --git a/llvm/test/CodeGen/AArch64/neon-dotreduce.ll b/llvm/test/CodeGen/AArch64/neon-dotreduce.ll
index 4f0c4080aa0c..9443004ea434 100644
--- a/llvm/test/CodeGen/AArch64/neon-dotreduce.ll
+++ b/llvm/test/CodeGen/AArch64/neon-dotreduce.ll
@@ -6810,195 +6810,200 @@ define i32 @test_sdot_v48i8_double_nomla(<48 x i8> %a, <48 x i8> %b, <48 x i8> %
; CHECK-SD-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
; CHECK-SD-NEXT: .cfi_offset w29, -16
-; CHECK-SD-NEXT: ldr b5, [sp, #208]
+; CHECK-SD-NEXT: ldr b0, [sp, #208]
; CHECK-SD-NEXT: add x8, sp, #216
-; CHECK-SD-NEXT: fmov s0, w0
+; CHECK-SD-NEXT: add x9, sp, #272
+; CHECK-SD-NEXT: ldr b2, [sp, #80]
; CHECK-SD-NEXT: ldr b4, [sp, #976]
-; CHECK-SD-NEXT: add x9, sp, #984
-; CHECK-SD-NEXT: add x12, sp, #328
-; CHECK-SD-NEXT: ld1 { v5.b }[1], [x8]
-; CHECK-SD-NEXT: add x8, sp, #224
-; CHECK-SD-NEXT: movi v1.16b, #1
-; CHECK-SD-NEXT: mov v0.b[1], w1
-; CHECK-SD-NEXT: ld1 { v4.b }[1], [x9]
-; CHECK-SD-NEXT: movi v3.2d, #0000000000000000
-; CHECK-SD-NEXT: add x11, sp, #992
; CHECK-SD-NEXT: ldr b6, [sp, #720]
-; CHECK-SD-NEXT: ldr b7, [sp, #80]
-; CHECK-SD-NEXT: ld1 { v5.b }[2], [x8]
+; CHECK-SD-NEXT: ld1 { v0.b }[1], [x8]
+; CHECK-SD-NEXT: add x8, sp, #224
+; CHECK-SD-NEXT: fmov s16, w0
+; CHECK-SD-NEXT: ldr b17, [sp, #848]
+; CHECK-SD-NEXT: add x10, sp, #24
+; CHECK-SD-NEXT: movi v19.2d, #0000000000000000
+; CHECK-SD-NEXT: ld1 { v0.b }[2], [x8]
; CHECK-SD-NEXT: add x8, sp, #232
-; CHECK-SD-NEXT: add x13, sp, #88
-; CHECK-SD-NEXT: ld1 { v4.b }[2], [x11]
-; CHECK-SD-NEXT: ld1 { v7.b }[1], [x13]
-; CHECK-SD-NEXT: add x13, sp, #856
-; CHECK-SD-NEXT: mov v0.b[2], w2
-; CHECK-SD-NEXT: add x14, sp, #1008
-; CHECK-SD-NEXT: add x15, sp, #872
-; CHECK-SD-NEXT: ld1 { v5.b }[3], [x8]
+; CHECK-SD-NEXT: mov v16.b[1], w1
+; CHECK-SD-NEXT: ld1 { v0.b }[3], [x8]
; CHECK-SD-NEXT: add x8, sp, #240
-; CHECK-SD-NEXT: add x16, sp, #888
-; CHECK-SD-NEXT: add x10, sp, #16
-; CHECK-SD-NEXT: add x9, sp, #24
-; CHECK-SD-NEXT: add x11, sp, #40
-; CHECK-SD-NEXT: movi v2.2d, #0000000000000000
-; CHECK-SD-NEXT: ld1 { v5.b }[4], [x8]
+; CHECK-SD-NEXT: mov v16.b[2], w2
+; CHECK-SD-NEXT: ld1 { v0.b }[4], [x8]
; CHECK-SD-NEXT: add x8, sp, #248
-; CHECK-SD-NEXT: mov v0.b[3], w3
-; CHECK-SD-NEXT: ld1 { v5.b }[5], [x8]
+; CHECK-SD-NEXT: mov v16.b[3], w3
+; CHECK-SD-NEXT: ld1 { v0.b }[5], [x8]
; CHECK-SD-NEXT: add x8, sp, #256
-; CHECK-SD-NEXT: mov v0.b[4], w4
-; CHECK-SD-NEXT: ld1 { v5.b }[6], [x8]
+; CHECK-SD-NEXT: ld1 { v0.b }[6], [x8]
; CHECK-SD-NEXT: add x8, sp, #264
-; CHECK-SD-NEXT: mov v0.b[5], w5
-; CHECK-SD-NEXT: ld1 { v5.b }[7], [x8]
-; CHECK-SD-NEXT: add x8, sp, #272
-; CHECK-SD-NEXT: ld1 { v5.b }[8], [x8]
+; CHECK-SD-NEXT: mov v16.b[4], w4
+; CHECK-SD-NEXT: ld1 { v0.b }[7], [x8]
+; CHECK-SD-NEXT: ldr b1, [x9]
; CHECK-SD-NEXT: add x8, sp, #280
-; CHECK-SD-NEXT: mov v0.b[6], w6
-; CHECK-SD-NEXT: ld1 { v5.b }[9], [x8]
+; CHECK-SD-NEXT: add x9, sp, #88
+; CHECK-SD-NEXT: mov v16.b[5], w5
+; CHECK-SD-NEXT: ld1 { v1.b }[1], [x8]
; CHECK-SD-NEXT: add x8, sp, #288
-; CHECK-SD-NEXT: mov v0.b[7], w7
-; CHECK-SD-NEXT: ld1 { v5.b }[10], [x8]
+; CHECK-SD-NEXT: ld1 { v1.b }[2], [x8]
; CHECK-SD-NEXT: add x8, sp, #296
-; CHECK-SD-NEXT: ld1 { v0.b }[8], [x10]
-; CHECK-SD-NEXT: add x10, sp, #128
-; CHECK-SD-NEXT: ld1 { v5.b }[11], [x8]
+; CHECK-SD-NEXT: mov v16.b[6], w6
+; CHECK-SD-NEXT: ld1 { v1.b }[3], [x8]
; CHECK-SD-NEXT: add x8, sp, #304
-; CHECK-SD-NEXT: ld1 { v0.b }[9], [x9]
-; CHECK-SD-NEXT: add x9, sp, #136
-; CHECK-SD-NEXT: ld1 { v5.b }[12], [x8]
+; CHECK-SD-NEXT: mov v16.b[7], w7
+; CHECK-SD-NEXT: ld1 { v1.b }[4], [x8]
; CHECK-SD-NEXT: add x8, sp, #312
-; CHECK-SD-NEXT: ld1 { v5.b }[13], [x8]
+; CHECK-SD-NEXT: ld1 { v1.b }[5], [x8]
; CHECK-SD-NEXT: add x8, sp, #320
-; CHECK-SD-NEXT: ld1 { v5.b }[14], [x8]
-; CHECK-SD-NEXT: add x8, sp, #32
-; CHECK-SD-NEXT: ld1 { v0.b }[10], [x8]
-; CHECK-SD-NEXT: add x8, sp, #144
-; CHECK-SD-NEXT: ld1 { v5.b }[15], [x12]
-; CHECK-SD-NEXT: add x12, sp, #728
-; CHECK-SD-NEXT: ld1 { v6.b }[1], [x12]
-; CHECK-SD-NEXT: add x12, sp, #1000
-; CHECK-SD-NEXT: ld1 { v0.b }[11], [x11]
-; CHECK-SD-NEXT: ld1 { v4.b }[3], [x12]
-; CHECK-SD-NEXT: add x12, sp, #736
-; CHECK-SD-NEXT: add x11, sp, #920
-; CHECK-SD-NEXT: sdot v3.4s, v5.16b, v1.16b
-; CHECK-SD-NEXT: ldr b5, [sp, #848]
-; CHECK-SD-NEXT: ld1 { v6.b }[2], [x12]
-; CHECK-SD-NEXT: add x12, sp, #48
-; CHECK-SD-NEXT: ld1 { v5.b }[1], [x13]
-; CHECK-SD-NEXT: add x13, sp, #744
-; CHECK-SD-NEXT: ld1 { v4.b }[4], [x14]
-; CHECK-SD-NEXT: add x14, sp, #96
-; CHECK-SD-NEXT: ld1 { v0.b }[12], [x12]
-; CHECK-SD-NEXT: ld1 { v6.b }[3], [x13]
-; CHECK-SD-NEXT: add x13, sp, #864
-; CHECK-SD-NEXT: ld1 { v7.b }[2], [x14]
-; CHECK-SD-NEXT: add x14, sp, #1016
-; CHECK-SD-NEXT: ld1 { v5.b }[2], [x13]
-; CHECK-SD-NEXT: add x13, sp, #752
-; CHECK-SD-NEXT: ld1 { v4.b }[5], [x14]
-; CHECK-SD-NEXT: add x14, sp, #104
-; CHECK-SD-NEXT: ld1 { v6.b }[4], [x13]
-; CHECK-SD-NEXT: add x13, sp, #1024
-; CHECK-SD-NEXT: ld1 { v7.b }[3], [x14]
-; CHECK-SD-NEXT: ld1 { v5.b }[3], [x15]
-; CHECK-SD-NEXT: add x15, sp, #760
-; CHECK-SD-NEXT: add x14, sp, #112
-; CHECK-SD-NEXT: ld1 { v4.b }[6], [x13]
-; CHECK-SD-NEXT: add x13, sp, #880
-; CHECK-SD-NEXT: ld1 { v6.b }[5], [x15]
-; CHECK-SD-NEXT: add x15, sp, #1032
-; CHECK-SD-NEXT: ld1 { v7.b }[4], [x14]
-; CHECK-SD-NEXT: ld1 { v5.b }[4], [x13]
-; CHECK-SD-NEXT: add x14, sp, #768
-; CHECK-SD-NEXT: add x13, sp, #120
-; CHECK-SD-NEXT: ld1 { v4.b }[7], [x15]
-; CHECK-SD-NEXT: add x15, sp, #1040
-; CHECK-SD-NEXT: ld1 { v6.b }[6], [x14]
-; CHECK-SD-NEXT: ld1 { v7.b }[5], [x13]
-; CHECK-SD-NEXT: add x13, sp, #776
-; CHECK-SD-NEXT: ld1 { v5.b }[5], [x16]
-; CHECK-SD-NEXT: add x14, sp, #1048
-; CHECK-SD-NEXT: ld1 { v4.b }[8], [x15]
-; CHECK-SD-NEXT: add x15, sp, #896
-; CHECK-SD-NEXT: ld1 { v6.b }[7], [x13]
-; CHECK-SD-NEXT: ld1 { v7.b }[6], [x10]
-; CHECK-SD-NEXT: add x10, sp, #784
-; CHECK-SD-NEXT: ld1 { v5.b }[6], [x15]
-; CHECK-SD-NEXT: add x13, sp, #1056
-; CHECK-SD-NEXT: ld1 { v4.b }[9], [x14]
-; CHECK-SD-NEXT: add x14, sp, #904
-; CHECK-SD-NEXT: ld1 { v6.b }[8], [x10]
-; CHECK-SD-NEXT: ld1 { v7.b }[7], [x9]
-; CHECK-SD-NEXT: add x9, sp, #792
-; CHECK-SD-NEXT: ld1 { v5.b }[7], [x14]
-; CHECK-SD-NEXT: add x10, sp, #1064
-; CHECK-SD-NEXT: ld1 { v4.b }[10], [x13]
-; CHECK-SD-NEXT: add x13, sp, #912
-; CHECK-SD-NEXT: ld1 { v6.b }[9], [x9]
-; CHECK-SD-NEXT: ld1 { v7.b }[8], [x8]
-; CHECK-SD-NEXT: add x9, sp, #800
-; CHECK-SD-NEXT: ld1 { v5.b }[8], [x13]
+; CHECK-SD-NEXT: ld1 { v1.b }[6], [x8]
+; CHECK-SD-NEXT: add x8, sp, #328
+; CHECK-SD-NEXT: ld1 { v1.b }[7], [x8]
+; CHECK-SD-NEXT: ld1 { v2.b }[1], [x9]
+; CHECK-SD-NEXT: add x8, sp, #96
+; CHECK-SD-NEXT: add x9, sp, #144
+; CHECK-SD-NEXT: ld1 { v2.b }[2], [x8]
+; CHECK-SD-NEXT: add x8, sp, #104
+; CHECK-SD-NEXT: zip1 v0.2d, v0.2d, v1.2d
+; CHECK-SD-NEXT: movi v1.16b, #1
+; CHECK-SD-NEXT: ld1 { v2.b }[3], [x8]
+; CHECK-SD-NEXT: add x8, sp, #112
+; CHECK-SD-NEXT: ld1 { v2.b }[4], [x8]
+; CHECK-SD-NEXT: add x8, sp, #120
+; CHECK-SD-NEXT: ld1 { v2.b }[5], [x8]
+; CHECK-SD-NEXT: add x8, sp, #128
+; CHECK-SD-NEXT: ld1 { v2.b }[6], [x8]
+; CHECK-SD-NEXT: add x8, sp, #136
+; CHECK-SD-NEXT: ld1 { v2.b }[7], [x8]
+; CHECK-SD-NEXT: ldr b3, [x9]
; CHECK-SD-NEXT: add x8, sp, #152
-; CHECK-SD-NEXT: ld1 { v4.b }[11], [x10]
-; CHECK-SD-NEXT: add x10, sp, #1072
-; CHECK-SD-NEXT: ld1 { v6.b }[10], [x9]
-; CHECK-SD-NEXT: ld1 { v7.b }[9], [x8]
-; CHECK-SD-NEXT: add x9, sp, #808
-; CHECK-SD-NEXT: ld1 { v5.b }[9], [x11]
-; CHECK-SD-NEXT: add x8, sp, #56
-; CHECK-SD-NEXT: ld1 { v4.b }[12], [x10]
-; CHECK-SD-NEXT: add x10, sp, #160
-; CHECK-SD-NEXT: ld1 { v0.b }[13], [x8]
-; CHECK-SD-NEXT: ld1 { v6.b }[11], [x9]
-; CHECK-SD-NEXT: add x9, sp, #928
-; CHECK-SD-NEXT: ld1 { v7.b }[10], [x10]
-; CHECK-SD-NEXT: add x10, sp, #1080
-; CHECK-SD-NEXT: ld1 { v5.b }[10], [x9]
+; CHECK-SD-NEXT: add x9, sp, #984
+; CHECK-SD-NEXT: ld1 { v3.b }[1], [x8]
+; CHECK-SD-NEXT: add x8, sp, #160
+; CHECK-SD-NEXT: ld1 { v3.b }[2], [x8]
+; CHECK-SD-NEXT: add x8, sp, #168
+; CHECK-SD-NEXT: ld1 { v3.b }[3], [x8]
+; CHECK-SD-NEXT: add x8, sp, #176
+; CHECK-SD-NEXT: ld1 { v3.b }[4], [x8]
+; CHECK-SD-NEXT: add x8, sp, #184
+; CHECK-SD-NEXT: ld1 { v3.b }[5], [x8]
+; CHECK-SD-NEXT: add x8, sp, #192
+; CHECK-SD-NEXT: ld1 { v3.b }[6], [x8]
+; CHECK-SD-NEXT: add x8, sp, #200
+; CHECK-SD-NEXT: ld1 { v3.b }[7], [x8]
+; CHECK-SD-NEXT: ld1 { v4.b }[1], [x9]
+; CHECK-SD-NEXT: add x8, sp, #992
+; CHECK-SD-NEXT: add x9, sp, #1040
+; CHECK-SD-NEXT: ld1 { v4.b }[2], [x8]
+; CHECK-SD-NEXT: add x8, sp, #1000
+; CHECK-SD-NEXT: zip1 v2.2d, v2.2d, v3.2d
+; CHECK-SD-NEXT: ld1 { v4.b }[3], [x8]
+; CHECK-SD-NEXT: add x8, sp, #1008
+; CHECK-SD-NEXT: ld1 { v4.b }[4], [x8]
+; CHECK-SD-NEXT: add x8, sp, #1016
+; CHECK-SD-NEXT: ld1 { v4.b }[5], [x8]
+; CHECK-SD-NEXT: add x8, sp, #1024
+; CHECK-SD-NEXT: ld1 { v4.b }[6], [x8]
+; CHECK-SD-NEXT: add x8, sp, #1032
+; CHECK-SD-NEXT: ld1 { v4.b }[7], [x8]
+; CHECK-SD-NEXT: ldr b5, [x9]
+; CHECK-SD-NEXT: add x8, sp, #1048
+; CHECK-SD-NEXT: add x9, sp, #728
+; CHECK-SD-NEXT: ld1 { v5.b }[1], [x8]
+; CHECK-SD-NEXT: add x8, sp, #1056
+; CHECK-SD-NEXT: ld1 { v5.b }[2], [x8]
+; CHECK-SD-NEXT: add x8, sp, #1064
+; CHECK-SD-NEXT: ld1 { v5.b }[3], [x8]
+; CHECK-SD-NEXT: add x8, sp, #1072
+; CHECK-SD-NEXT: ld1 { v5.b }[4], [x8]
+; CHECK-SD-NEXT: add x8, sp, #1080
+; CHECK-SD-NEXT: ld1 { v5.b }[5], [x8]
+; CHECK-SD-NEXT: add x8, sp, #1088
+; CHECK-SD-NEXT: ld1 { v5.b }[6], [x8]
+; CHECK-SD-NEXT: add x8, sp, #1096
+; CHECK-SD-NEXT: ld1 { v5.b }[7], [x8]
+; CHECK-SD-NEXT: ld1 { v6.b }[1], [x9]
+; CHECK-SD-NEXT: add x8, sp, #736
+; CHECK-SD-NEXT: add x9, sp, #784
+; CHECK-SD-NEXT: ld1 { v6.b }[2], [x8]
+; CHECK-SD-NEXT: add x8, sp, #744
+; CHECK-SD-NEXT: zip1 v4.2d, v4.2d, v5.2d
+; CHECK-SD-NEXT: movi v5.2d, #0000000000000000
+; CHECK-SD-NEXT: ld1 { v6.b }[3], [x8]
+; CHECK-SD-NEXT: add x8, sp, #752
+; CHECK-SD-NEXT: sdot v19.4s, v4.16b, v1.16b
+; CHECK-SD-NEXT: sdot v5.4s, v0.16b, v1.16b
+; CHECK-SD-NEXT: ld1 { v6.b }[4], [x8]
+; CHECK-SD-NEXT: add x8, sp, #760
+; CHECK-SD-NEXT: ld1 { v6.b }[5], [x8]
+; CHECK-SD-NEXT: add x8, sp, #768
+; CHECK-SD-NEXT: ld1 { v6.b }[6], [x8]
+; CHECK-SD-NEXT: add x8, sp, #776
+; CHECK-SD-NEXT: ld1 { v6.b }[7], [x8]
+; CHECK-SD-NEXT: ldr b7, [x9]
+; CHECK-SD-NEXT: add x8, sp, #792
+; CHECK-SD-NEXT: add x9, sp, #856
+; CHECK-SD-NEXT: ld1 { v7.b }[1], [x8]
+; CHECK-SD-NEXT: add x8, sp, #800
+; CHECK-SD-NEXT: ld1 { v7.b }[2], [x8]
+; CHECK-SD-NEXT: add x8, sp, #808
+; CHECK-SD-NEXT: ld1 { v7.b }[3], [x8]
; CHECK-SD-NEXT: add x8, sp, #816
-; CHECK-SD-NEXT: ld1 { v4.b }[13], [x10]
-; CHECK-SD-NEXT: add x9, sp, #168
-; CHECK-SD-NEXT: add x10, sp, #176
-; CHECK-SD-NEXT: ld1 { v6.b }[12], [x8]
-; CHECK-SD-NEXT: add x8, sp, #936
-; CHECK-SD-NEXT: ld1 { v7.b }[11], [x9]
-; CHECK-SD-NEXT: add x9, sp, #1088
-; CHECK-SD-NEXT: ld1 { v5.b }[11], [x8]
-; CHECK-SD-NEXT: add x8, sp, #64
-; CHECK-SD-NEXT: ld1 { v4.b }[14], [x9]
-; CHECK-SD-NEXT: add x9, sp, #824
-; CHECK-SD-NEXT: ld1 { v0.b }[14], [x8]
-; CHECK-SD-NEXT: ld1 { v6.b }[13], [x9]
-; CHECK-SD-NEXT: add x9, sp, #944
-; CHECK-SD-NEXT: ld1 { v7.b }[12], [x10]
-; CHECK-SD-NEXT: add x10, sp, #1096
-; CHECK-SD-NEXT: ld1 { v5.b }[12], [x9]
+; CHECK-SD-NEXT: ld1 { v7.b }[4], [x8]
+; CHECK-SD-NEXT: add x8, sp, #824
+; CHECK-SD-NEXT: ld1 { v7.b }[5], [x8]
; CHECK-SD-NEXT: add x8, sp, #832
-; CHECK-SD-NEXT: ld1 { v4.b }[15], [x10]
-; CHECK-SD-NEXT: add x9, sp, #184
-; CHECK-SD-NEXT: add x10, sp, #72
-; CHECK-SD-NEXT: ld1 { v6.b }[14], [x8]
-; CHECK-SD-NEXT: add x8, sp, #952
-; CHECK-SD-NEXT: ld1 { v7.b }[13], [x9]
-; CHECK-SD-NEXT: ld1 { v5.b }[13], [x8]
+; CHECK-SD-NEXT: ld1 { v7.b }[6], [x8]
; CHECK-SD-NEXT: add x8, sp, #840
-; CHECK-SD-NEXT: ld1 { v0.b }[15], [x10]
-; CHECK-SD-NEXT: sdot v2.4s, v4.16b, v1.16b
-; CHECK-SD-NEXT: add x9, sp, #192
-; CHECK-SD-NEXT: ld1 { v6.b }[15], [x8]
+; CHECK-SD-NEXT: ld1 { v7.b }[7], [x8]
+; CHECK-SD-NEXT: ld1 { v17.b }[1], [x9]
+; CHECK-SD-NEXT: add x8, sp, #864
+; CHECK-SD-NEXT: add x9, sp, #16
+; CHECK-SD-NEXT: ld1 { v16.b }[8], [x9]
+; CHECK-SD-NEXT: add x9, sp, #912
+; CHECK-SD-NEXT: ld1 { v17.b }[2], [x8]
+; CHECK-SD-NEXT: add x8, sp, #872
+; CHECK-SD-NEXT: zip1 v0.2d, v6.2d, v7.2d
+; CHECK-SD-NEXT: ld1 { v16.b }[9], [x10]
+; CHECK-SD-NEXT: ld1 { v17.b }[3], [x8]
+; CHECK-SD-NEXT: add x8, sp, #880
+; CHECK-SD-NEXT: sdot v19.4s, v0.16b, v1.16b
+; CHECK-SD-NEXT: ld1 { v17.b }[4], [x8]
+; CHECK-SD-NEXT: add x8, sp, #888
+; CHECK-SD-NEXT: ld1 { v17.b }[5], [x8]
+; CHECK-SD-NEXT: add x8, sp, #896
+; CHECK-SD-NEXT: ld1 { v17.b }[6], [x8]
+; CHECK-SD-NEXT: add x8, sp, #904
+; CHECK-SD-NEXT: ld1 { v17.b }[7], [x8]
+; CHECK-SD-NEXT: ldr b18, [x9]
+; CHECK-SD-NEXT: add x8, sp, #920
+; CHECK-SD-NEXT: ld1 { v18.b }[1], [x8]
+; CHECK-SD-NEXT: add x8, sp, #32
+; CHECK-SD-NEXT: ld1 { v16.b }[10], [x8]
+; CHECK-SD-NEXT: add x8, sp, #928
+; CHECK-SD-NEXT: ld1 { v18.b }[2], [x8]
+; CHECK-SD-NEXT: add x8, sp, #40
+; CHECK-SD-NEXT: ld1 { v16.b }[11], [x8]
+; CHECK-SD-NEXT: add x8, sp, #936
+; CHECK-SD-NEXT: ld1 { v18.b }[3], [x8]
+; CHECK-SD-NEXT: add x8, sp, #48
+; CHECK-SD-NEXT: ld1 { v16.b }[12], [x8]
+; CHECK-SD-NEXT: add x8, sp, #944
+; CHECK-SD-NEXT: ld1 { v18.b }[4], [x8]
+; CHECK-SD-NEXT: add x8, sp, #56
+; CHECK-SD-NEXT: ld1 { v16.b }[13], [x8]
+; CHECK-SD-NEXT: add x8, sp, #952
+; CHECK-SD-NEXT: ld1 { v18.b }[5], [x8]
+; CHECK-SD-NEXT: add x8, sp, #64
+; CHECK-SD-NEXT: ld1 { v16.b }[14], [x8]
; CHECK-SD-NEXT: add x8, sp, #960
-; CHECK-SD-NEXT: ld1 { v7.b }[14], [x9]
-; CHECK-SD-NEXT: ld1 { v5.b }[14], [x8]
-; CHECK-SD-NEXT: sdot v3.4s, v0.16b, v1.16b
-; CHECK-SD-NEXT: add x8, sp, #200
-; CHECK-SD-NEXT: add x9, sp, #968
-; CHECK-SD-NEXT: sdot v2.4s, v6.16b, v1.16b
-; CHECK-SD-NEXT: ld1 { v7.b }[15], [x8]
-; CHECK-SD-NEXT: ld1 { v5.b }[15], [x9]
-; CHECK-SD-NEXT: sdot v3.4s, v7.16b, v1.16b
-; CHECK-SD-NEXT: sdot v2.4s, v5.16b, v1.16b
-; CHECK-SD-NEXT: add v0.4s, v3.4s, v2.4s
+; CHECK-SD-NEXT: ld1 { v18.b }[6], [x8]
+; CHECK-SD-NEXT: add x8, sp, #72
+; CHECK-SD-NEXT: ld1 { v16.b }[15], [x8]
+; CHECK-SD-NEXT: add x8, sp, #968
+; CHECK-SD-NEXT: ld1 { v18.b }[7], [x8]
+; CHECK-SD-NEXT: sdot v5.4s, v16.16b, v1.16b
+; CHECK-SD-NEXT: zip1 v0.2d, v17.2d, v18.2d
+; CHECK-SD-NEXT: sdot v5.4s, v2.16b, v1.16b
+; CHECK-SD-NEXT: sdot v19.4s, v0.16b, v1.16b
+; CHECK-SD-NEXT: add v0.4s, v5.4s, v19.4s
; CHECK-SD-NEXT: addv s0, v0.4s
; CHECK-SD-NEXT: fmov w0, s0
; CHECK-SD-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/AArch64/nontemporal.ll b/llvm/test/CodeGen/AArch64/nontemporal.ll
index f8ba150a0405..f7a87ae340a7 100644
--- a/llvm/test/CodeGen/AArch64/nontemporal.ll
+++ b/llvm/test/CodeGen/AArch64/nontemporal.ll
@@ -683,41 +683,43 @@ define void @test_stnp_v17f32(<17 x float> %v, ptr %ptr) {
;
; CHECK-BE-LABEL: test_stnp_v17f32:
; CHECK-BE: // %bb.0: // %entry
-; CHECK-BE-NEXT: // kill: def $s4 killed $s4 def $q4
+; CHECK-BE-NEXT: // kill: def $s1 killed $s1 def $q1
; CHECK-BE-NEXT: // kill: def $s0 killed $s0 def $q0
-; CHECK-BE-NEXT: ldr s16, [sp, #36]
+; CHECK-BE-NEXT: // kill: def $s4 killed $s4 def $q4
; CHECK-BE-NEXT: // kill: def $s5 killed $s5 def $q5
-; CHECK-BE-NEXT: // kill: def $s1 killed $s1 def $q1
-; CHECK-BE-NEXT: ldr s17, [sp, #4]
-; CHECK-BE-NEXT: add x8, sp, #44
-; CHECK-BE-NEXT: mov v4.s[1], v5.s[0]
+; CHECK-BE-NEXT: add x8, sp, #12
+; CHECK-BE-NEXT: add x9, sp, #20
+; CHECK-BE-NEXT: ldr s16, [sp, #36]
; CHECK-BE-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-BE-NEXT: ldr s1, [sp, #4]
+; CHECK-BE-NEXT: mov v4.s[1], v5.s[0]
+; CHECK-BE-NEXT: add x10, sp, #52
; CHECK-BE-NEXT: // kill: def $s6 killed $s6 def $q6
; CHECK-BE-NEXT: // kill: def $s2 killed $s2 def $q2
; CHECK-BE-NEXT: // kill: def $s7 killed $s7 def $q7
; CHECK-BE-NEXT: // kill: def $s3 killed $s3 def $q3
-; CHECK-BE-NEXT: ldr s1, [sp, #68]
-; CHECK-BE-NEXT: ld1 { v16.s }[1], [x8]
-; CHECK-BE-NEXT: add x8, sp, #12
-; CHECK-BE-NEXT: ld1 { v17.s }[1], [x8]
-; CHECK-BE-NEXT: add x8, sp, #52
-; CHECK-BE-NEXT: str s1, [x0, #64]
-; CHECK-BE-NEXT: ld1 { v16.s }[2], [x8]
-; CHECK-BE-NEXT: add x8, sp, #20
+; CHECK-BE-NEXT: ld1 { v1.s }[1], [x8]
+; CHECK-BE-NEXT: ldr s5, [x9]
+; CHECK-BE-NEXT: add x8, sp, #28
+; CHECK-BE-NEXT: add x9, sp, #44
+; CHECK-BE-NEXT: ld1 { v5.s }[1], [x8]
+; CHECK-BE-NEXT: ld1 { v16.s }[1], [x9]
+; CHECK-BE-NEXT: ldr s17, [x10]
+; CHECK-BE-NEXT: add x8, sp, #60
; CHECK-BE-NEXT: mov v4.s[2], v6.s[0]
; CHECK-BE-NEXT: mov v0.s[2], v2.s[0]
-; CHECK-BE-NEXT: ld1 { v17.s }[2], [x8]
-; CHECK-BE-NEXT: add x8, sp, #60
-; CHECK-BE-NEXT: ld1 { v16.s }[3], [x8]
-; CHECK-BE-NEXT: add x8, sp, #28
-; CHECK-BE-NEXT: ld1 { v17.s }[3], [x8]
+; CHECK-BE-NEXT: ld1 { v17.s }[1], [x8]
+; CHECK-BE-NEXT: ldr s2, [sp, #68]
+; CHECK-BE-NEXT: add x8, x0, #32
+; CHECK-BE-NEXT: zip1 v1.2d, v1.2d, v5.2d
+; CHECK-BE-NEXT: add x9, x0, #48
+; CHECK-BE-NEXT: str s2, [x0, #64]
+; CHECK-BE-NEXT: zip1 v5.2d, v16.2d, v17.2d
; CHECK-BE-NEXT: mov v4.s[3], v7.s[0]
-; CHECK-BE-NEXT: add x8, x0, #48
; CHECK-BE-NEXT: mov v0.s[3], v3.s[0]
-; CHECK-BE-NEXT: st1 { v16.4s }, [x8]
-; CHECK-BE-NEXT: add x8, x0, #32
-; CHECK-BE-NEXT: st1 { v17.4s }, [x8]
+; CHECK-BE-NEXT: st1 { v1.4s }, [x8]
; CHECK-BE-NEXT: add x8, x0, #16
+; CHECK-BE-NEXT: st1 { v5.4s }, [x9]
; CHECK-BE-NEXT: st1 { v4.4s }, [x8]
; CHECK-BE-NEXT: st1 { v0.4s }, [x0]
; CHECK-BE-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
index 35b9457fbc1f..9df71cfc96cc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
@@ -5712,9 +5712,8 @@ define <8 x i8> @vsub_if_uge_v8i8(<8 x i8> %va, <8 x i8> %vb) {
; CHECK-LABEL: vsub_if_uge_v8i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vmsltu.vv v0, v8, v9
; CHECK-NEXT: vsub.vv v9, v8, v9
-; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
+; CHECK-NEXT: vminu.vv v8, v8, v9
; CHECK-NEXT: ret
%cmp = icmp ult <8 x i8> %va, %vb
%select = select <8 x i1> %cmp, <8 x i8> zeroinitializer, <8 x i8> %vb
@@ -5725,9 +5724,9 @@ define <8 x i8> @vsub_if_uge_v8i8(<8 x i8> %va, <8 x i8> %vb) {
define <8 x i8> @vsub_if_uge_swapped_v8i8(<8 x i8> %va, <8 x i8> %vb) {
; CHECK-LABEL: vsub_if_uge_swapped_v8i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
-; CHECK-NEXT: vmsleu.vv v0, v9, v8
-; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vsub.vv v9, v8, v9
+; CHECK-NEXT: vminu.vv v8, v8, v9
; CHECK-NEXT: ret
%cmp = icmp uge <8 x i8> %va, %vb
%select = select <8 x i1> %cmp, <8 x i8> %vb, <8 x i8> zeroinitializer
@@ -5739,9 +5738,8 @@ define <8 x i16> @vsub_if_uge_v8i16(<8 x i16> %va, <8 x i16> %vb) {
; CHECK-LABEL: vsub_if_uge_v8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vmsltu.vv v0, v8, v9
; CHECK-NEXT: vsub.vv v9, v8, v9
-; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
+; CHECK-NEXT: vminu.vv v8, v8, v9
; CHECK-NEXT: ret
%cmp = icmp ult <8 x i16> %va, %vb
%select = select <8 x i1> %cmp, <8 x i16> zeroinitializer, <8 x i16> %vb
@@ -5752,9 +5750,9 @@ define <8 x i16> @vsub_if_uge_v8i16(<8 x i16> %va, <8 x i16> %vb) {
define <8 x i16> @vsub_if_uge_swapped_v8i16(<8 x i16> %va, <8 x i16> %vb) {
; CHECK-LABEL: vsub_if_uge_swapped_v8i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu
-; CHECK-NEXT: vmsleu.vv v0, v9, v8
-; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vsub.vv v9, v8, v9
+; CHECK-NEXT: vminu.vv v8, v8, v9
; CHECK-NEXT: ret
%cmp = icmp uge <8 x i16> %va, %vb
%select = select <8 x i1> %cmp, <8 x i16> %vb, <8 x i16> zeroinitializer
@@ -5766,9 +5764,8 @@ define <4 x i32> @vsub_if_uge_v4i32(<4 x i32> %va, <4 x i32> %vb) {
; CHECK-LABEL: vsub_if_uge_v4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vmsltu.vv v0, v8, v9
; CHECK-NEXT: vsub.vv v9, v8, v9
-; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
+; CHECK-NEXT: vminu.vv v8, v8, v9
; CHECK-NEXT: ret
%cmp = icmp ult <4 x i32> %va, %vb
%select = select <4 x i1> %cmp, <4 x i32> zeroinitializer, <4 x i32> %vb
@@ -5779,9 +5776,9 @@ define <4 x i32> @vsub_if_uge_v4i32(<4 x i32> %va, <4 x i32> %vb) {
define <4 x i32> @vsub_if_uge_swapped_v4i32(<4 x i32> %va, <4 x i32> %vb) {
; CHECK-LABEL: vsub_if_uge_swapped_v4i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
-; CHECK-NEXT: vmsleu.vv v0, v9, v8
-; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vsub.vv v9, v8, v9
+; CHECK-NEXT: vminu.vv v8, v8, v9
; CHECK-NEXT: ret
%cmp = icmp uge <4 x i32> %va, %vb
%select = select <4 x i1> %cmp, <4 x i32> %vb, <4 x i32> zeroinitializer
@@ -5793,9 +5790,8 @@ define <2 x i64> @vsub_if_uge_v2i64(<2 x i64> %va, <2 x i64> %vb) {
; CHECK-LABEL: vsub_if_uge_v2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT: vmsltu.vv v0, v8, v9
; CHECK-NEXT: vsub.vv v9, v8, v9
-; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
+; CHECK-NEXT: vminu.vv v8, v8, v9
; CHECK-NEXT: ret
%cmp = icmp ult <2 x i64> %va, %vb
%select = select <2 x i1> %cmp, <2 x i64> zeroinitializer, <2 x i64> %vb
@@ -5806,9 +5802,9 @@ define <2 x i64> @vsub_if_uge_v2i64(<2 x i64> %va, <2 x i64> %vb) {
define <2 x i64> @vsub_if_uge_swapped_v2i64(<2 x i64> %va, <2 x i64> %vb) {
; CHECK-LABEL: vsub_if_uge_swapped_v2i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu
-; CHECK-NEXT: vmsleu.vv v0, v9, v8
-; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; CHECK-NEXT: vsub.vv v9, v8, v9
+; CHECK-NEXT: vminu.vv v8, v8, v9
; CHECK-NEXT: ret
%cmp = icmp uge <2 x i64> %va, %vb
%select = select <2 x i1> %cmp, <2 x i64> %vb, <2 x i64> zeroinitializer
@@ -5819,9 +5815,9 @@ define <2 x i64> @vsub_if_uge_swapped_v2i64(<2 x i64> %va, <2 x i64> %vb) {
define <8 x i8> @sub_if_uge_C_v8i8(<8 x i8> %x) {
; CHECK-LABEL: sub_if_uge_C_v8i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
-; CHECK-NEXT: vmsgtu.vi v0, v8, 12
-; CHECK-NEXT: vadd.vi v8, v8, -13, v0.t
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vadd.vi v9, v8, -13
+; CHECK-NEXT: vminu.vv v8, v9, v8
; CHECK-NEXT: ret
%cmp = icmp ugt <8 x i8> %x, splat (i8 12)
%sub = add <8 x i8> %x, splat (i8 -13)
@@ -5832,11 +5828,10 @@ define <8 x i8> @sub_if_uge_C_v8i8(<8 x i8> %x) {
define <8 x i16> @sub_if_uge_C_v8i16(<8 x i16> %x) {
; CHECK-LABEL: sub_if_uge_C_v8i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a0, 2000
-; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu
-; CHECK-NEXT: vmsgtu.vx v0, v8, a0
; CHECK-NEXT: li a0, -2001
-; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vadd.vx v9, v8, a0
+; CHECK-NEXT: vminu.vv v8, v9, v8
; CHECK-NEXT: ret
%cmp = icmp ugt <8 x i16> %x, splat (i16 2000)
%sub = add <8 x i16> %x, splat (i16 -2001)
@@ -5847,13 +5842,11 @@ define <8 x i16> @sub_if_uge_C_v8i16(<8 x i16> %x) {
define <4 x i32> @sub_if_uge_C_v4i32(<4 x i32> %x) {
; CHECK-LABEL: sub_if_uge_C_v4i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, 16
-; CHECK-NEXT: addi a0, a0, -16
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
-; CHECK-NEXT: vmsgtu.vx v0, v8, a0
; CHECK-NEXT: lui a0, 1048560
; CHECK-NEXT: addi a0, a0, 15
-; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vadd.vx v9, v8, a0
+; CHECK-NEXT: vminu.vv v8, v9, v8
; CHECK-NEXT: ret
%cmp = icmp ugt <4 x i32> %x, splat (i32 65520)
%sub = add <4 x i32> %x, splat (i32 -65521)
@@ -5864,14 +5857,11 @@ define <4 x i32> @sub_if_uge_C_v4i32(<4 x i32> %x) {
define <4 x i32> @sub_if_uge_C_swapped_v4i32(<4 x i32> %x) {
; CHECK-LABEL: sub_if_uge_C_swapped_v4i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, 16
-; CHECK-NEXT: addi a0, a0, -15
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vmsltu.vx v0, v8, a0
; CHECK-NEXT: lui a0, 1048560
; CHECK-NEXT: addi a0, a0, 15
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vadd.vx v9, v8, a0
-; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
+; CHECK-NEXT: vminu.vv v8, v8, v9
; CHECK-NEXT: ret
%cmp = icmp ult <4 x i32> %x, splat (i32 65521)
%sub = add <4 x i32> %x, splat (i32 -65521)
@@ -5883,38 +5873,28 @@ define <2 x i64> @sub_if_uge_C_v2i64(<2 x i64> %x) nounwind {
; RV32-LABEL: sub_if_uge_C_v2i64:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: li a0, 1
-; RV32-NEXT: lui a1, 172127
-; RV32-NEXT: mv a2, sp
-; RV32-NEXT: addi a1, a1, 512
-; RV32-NEXT: sw a1, 0(sp)
-; RV32-NEXT: sw a0, 4(sp)
; RV32-NEXT: li a0, -2
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu
-; RV32-NEXT: vlse64.v v9, (a2), zero
; RV32-NEXT: lui a1, 876449
; RV32-NEXT: addi a1, a1, -513
; RV32-NEXT: sw a1, 8(sp)
; RV32-NEXT: sw a0, 12(sp)
; RV32-NEXT: addi a0, sp, 8
-; RV32-NEXT: vlse64.v v10, (a0), zero
-; RV32-NEXT: vmsltu.vv v0, v9, v8
-; RV32-NEXT: vadd.vv v8, v8, v10, v0.t
+; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT: vlse64.v v9, (a0), zero
+; RV32-NEXT: vadd.vv v9, v8, v9
+; RV32-NEXT: vminu.vv v8, v9, v8
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: sub_if_uge_C_v2i64:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 2384
-; RV64-NEXT: addi a0, a0, 761
-; RV64-NEXT: slli a0, a0, 9
-; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu
-; RV64-NEXT: vmsgtu.vx v0, v8, a0
; RV64-NEXT: lui a0, 1048278
; RV64-NEXT: addi a0, a0, -95
; RV64-NEXT: slli a0, a0, 12
; RV64-NEXT: addi a0, a0, -513
-; RV64-NEXT: vadd.vx v8, v8, a0, v0.t
+; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT: vadd.vx v9, v8, a0
+; RV64-NEXT: vminu.vv v8, v9, v8
; RV64-NEXT: ret
%cmp = icmp ugt <2 x i64> %x, splat (i64 5000000000)
%sub = add <2 x i64> %x, splat (i64 -5000000001)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
index 041aae229288..019bbe2908a2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
@@ -1718,6 +1718,28 @@ define void @load_factor4_one_active_storeback_full(ptr %ptr) {
ret void
}
+define <4 x i32> @vp_load_factor3_one_active(ptr %ptr) {
+; CHECK-LABEL: vp_load_factor3_one_active:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vlseg3e32.v v8, (a0)
+; CHECK-NEXT: ret
+ %interleaved.vec = tail call <12 x i32> @llvm.vp.load.v12i32.p0(ptr %ptr, <12 x i1> splat (i1 true), i32 12)
+ %v0 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
+ ret <4 x i32> %v0
+}
+
+define <4 x i32> @vp_load_factor5_one_active(ptr %ptr) {
+; CHECK-LABEL: vp_load_factor5_one_active:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vlseg5e32.v v8, (a0)
+; CHECK-NEXT: ret
+ %interleaved.vec = tail call <20 x i32> @llvm.vp.load.v20i32.p0(ptr %ptr, <20 x i1> splat (i1 true), i32 20)
+ %v0 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 0, i32 5, i32 10, i32 15>
+ ret <4 x i32> %v0
+}
+
define void @store_factor4_one_active(ptr %ptr, <4 x i32> %v) {
; CHECK-LABEL: store_factor4_one_active:
; CHECK: # %bb.0:
@@ -1804,8 +1826,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_mask(ptr %ptr) {
; RV32-NEXT: vle32.v v12, (a0), v0.t
; RV32-NEXT: li a0, 36
; RV32-NEXT: vmv.s.x v20, a1
-; RV32-NEXT: lui a1, %hi(.LCPI51_0)
-; RV32-NEXT: addi a1, a1, %lo(.LCPI51_0)
+; RV32-NEXT: lui a1, %hi(.LCPI53_0)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI53_0)
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vle16.v v21, (a1)
; RV32-NEXT: vcompress.vm v8, v12, v11
@@ -1880,8 +1902,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_evl(ptr %ptr) {
; RV32-NEXT: vmv.s.x v10, a0
; RV32-NEXT: li a0, 146
; RV32-NEXT: vmv.s.x v11, a0
-; RV32-NEXT: lui a0, %hi(.LCPI52_0)
-; RV32-NEXT: addi a0, a0, %lo(.LCPI52_0)
+; RV32-NEXT: lui a0, %hi(.LCPI54_0)
+; RV32-NEXT: addi a0, a0, %lo(.LCPI54_0)
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vle16.v v20, (a0)
; RV32-NEXT: li a0, 36
diff --git a/llvm/test/CodeGen/RISCV/rvv/vminu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vminu-sdnode.ll
index a21a526e00ec..9b58cb3d5c89 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vminu-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vminu-sdnode.ll
@@ -898,9 +898,8 @@ define <vscale x 2 x i8> @vsub_if_uge_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2
; CHECK-LABEL: vsub_if_uge_nxv2i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vmsltu.vv v0, v8, v9
; CHECK-NEXT: vsub.vv v9, v8, v9
-; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
+; CHECK-NEXT: vminu.vv v8, v8, v9
; CHECK-NEXT: ret
%cmp = icmp ult <vscale x 2 x i8> %va, %vb
%select = select <vscale x 2 x i1> %cmp, <vscale x 2 x i8> zeroinitializer, <vscale x 2 x i8> %vb
@@ -911,9 +910,9 @@ define <vscale x 2 x i8> @vsub_if_uge_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2
define <vscale x 2 x i8> @vsub_if_uge_swapped_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb) {
; CHECK-LABEL: vsub_if_uge_swapped_nxv2i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu
-; CHECK-NEXT: vmsleu.vv v0, v9, v8
-; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vsub.vv v9, v8, v9
+; CHECK-NEXT: vminu.vv v8, v8, v9
; CHECK-NEXT: ret
%cmp = icmp uge <vscale x 2 x i8> %va, %vb
%select = select <vscale x 2 x i1> %cmp, <vscale x 2 x i8> %vb, <vscale x 2 x i8> zeroinitializer
@@ -925,9 +924,8 @@ define <vscale x 2 x i16> @vsub_if_uge_nxv2i16(<vscale x 2 x i16> %va, <vscale x
; CHECK-LABEL: vsub_if_uge_nxv2i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vmsltu.vv v0, v8, v9
; CHECK-NEXT: vsub.vv v9, v8, v9
-; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
+; CHECK-NEXT: vminu.vv v8, v8, v9
; CHECK-NEXT: ret
%cmp = icmp ult <vscale x 2 x i16> %va, %vb
%select = select <vscale x 2 x i1> %cmp, <vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> %vb
@@ -938,9 +936,9 @@ define <vscale x 2 x i16> @vsub_if_uge_nxv2i16(<vscale x 2 x i16> %va, <vscale x
define <vscale x 2 x i16> @vsub_if_uge_swapped_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb) {
; CHECK-LABEL: vsub_if_uge_swapped_nxv2i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu
-; CHECK-NEXT: vmsleu.vv v0, v9, v8
-; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsub.vv v9, v8, v9
+; CHECK-NEXT: vminu.vv v8, v8, v9
; CHECK-NEXT: ret
%cmp = icmp uge <vscale x 2 x i16> %va, %vb
%select = select <vscale x 2 x i1> %cmp, <vscale x 2 x i16> %vb, <vscale x 2 x i16> zeroinitializer
@@ -952,9 +950,8 @@ define <vscale x 2 x i32> @vsub_if_uge_nxv2i32(<vscale x 2 x i32> %va, <vscale x
; CHECK-LABEL: vsub_if_uge_nxv2i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT: vmsltu.vv v0, v8, v9
; CHECK-NEXT: vsub.vv v9, v8, v9
-; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
+; CHECK-NEXT: vminu.vv v8, v8, v9
; CHECK-NEXT: ret
%cmp = icmp ult <vscale x 2 x i32> %va, %vb
%select = select <vscale x 2 x i1> %cmp, <vscale x 2 x i32> zeroinitializer, <vscale x 2 x i32> %vb
@@ -965,9 +962,9 @@ define <vscale x 2 x i32> @vsub_if_uge_nxv2i32(<vscale x 2 x i32> %va, <vscale x
define <vscale x 2 x i32> @vsub_if_uge_swapped_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb) {
; CHECK-LABEL: vsub_if_uge_swapped_nxv2i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu
-; CHECK-NEXT: vmsleu.vv v0, v9, v8
-; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT: vsub.vv v9, v8, v9
+; CHECK-NEXT: vminu.vv v8, v8, v9
; CHECK-NEXT: ret
%cmp = icmp uge <vscale x 2 x i32> %va, %vb
%select = select <vscale x 2 x i1> %cmp, <vscale x 2 x i32> %vb, <vscale x 2 x i32> zeroinitializer
@@ -979,9 +976,8 @@ define <vscale x 2 x i64> @vsub_if_uge_nxv2i64(<vscale x 2 x i64> %va, <vscale x
; CHECK-LABEL: vsub_if_uge_nxv2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT: vmsltu.vv v0, v8, v10
; CHECK-NEXT: vsub.vv v10, v8, v10
-; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
+; CHECK-NEXT: vminu.vv v8, v8, v10
; CHECK-NEXT: ret
%cmp = icmp ult <vscale x 2 x i64> %va, %vb
%select = select <vscale x 2 x i1> %cmp, <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> %vb
@@ -992,9 +988,9 @@ define <vscale x 2 x i64> @vsub_if_uge_nxv2i64(<vscale x 2 x i64> %va, <vscale x
define <vscale x 2 x i64> @vsub_if_uge_swapped_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb) {
; CHECK-LABEL: vsub_if_uge_swapped_nxv2i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu
-; CHECK-NEXT: vmsleu.vv v0, v10, v8
-; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
+; CHECK-NEXT: vsub.vv v10, v8, v10
+; CHECK-NEXT: vminu.vv v8, v8, v10
; CHECK-NEXT: ret
%cmp = icmp uge <vscale x 2 x i64> %va, %vb
%select = select <vscale x 2 x i1> %cmp, <vscale x 2 x i64> %vb, <vscale x 2 x i64> zeroinitializer
@@ -1005,9 +1001,9 @@ define <vscale x 2 x i64> @vsub_if_uge_swapped_nxv2i64(<vscale x 2 x i64> %va, <
define <vscale x 2 x i8> @sub_if_uge_C_nxv2i8(<vscale x 2 x i8> %x) {
; CHECK-LABEL: sub_if_uge_C_nxv2i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu
-; CHECK-NEXT: vmsgtu.vi v0, v8, 12
-; CHECK-NEXT: vadd.vi v8, v8, -13, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vadd.vi v9, v8, -13
+; CHECK-NEXT: vminu.vv v8, v9, v8
; CHECK-NEXT: ret
%cmp = icmp ugt <vscale x 2 x i8> %x, splat (i8 12)
%sub = add <vscale x 2 x i8> %x, splat (i8 -13)
@@ -1018,11 +1014,10 @@ define <vscale x 2 x i8> @sub_if_uge_C_nxv2i8(<vscale x 2 x i8> %x) {
define <vscale x 2 x i16> @sub_if_uge_C_nxv2i16(<vscale x 2 x i16> %x) {
; CHECK-LABEL: sub_if_uge_C_nxv2i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a0, 2000
-; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, mu
-; CHECK-NEXT: vmsgtu.vx v0, v8, a0
; CHECK-NEXT: li a0, -2001
-; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vadd.vx v9, v8, a0
+; CHECK-NEXT: vminu.vv v8, v9, v8
; CHECK-NEXT: ret
%cmp = icmp ugt <vscale x 2 x i16> %x, splat (i16 2000)
%sub = add <vscale x 2 x i16> %x, splat (i16 -2001)
@@ -1033,13 +1028,11 @@ define <vscale x 2 x i16> @sub_if_uge_C_nxv2i16(<vscale x 2 x i16> %x) {
define <vscale x 2 x i32> @sub_if_uge_C_nxv2i32(<vscale x 2 x i32> %x) {
; CHECK-LABEL: sub_if_uge_C_nxv2i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, 16
-; CHECK-NEXT: addi a0, a0, -16
-; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu
-; CHECK-NEXT: vmsgtu.vx v0, v8, a0
; CHECK-NEXT: lui a0, 1048560
; CHECK-NEXT: addi a0, a0, 15
-; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-NEXT: vadd.vx v9, v8, a0
+; CHECK-NEXT: vminu.vv v8, v9, v8
; CHECK-NEXT: ret
%cmp = icmp ugt <vscale x 2 x i32> %x, splat (i32 65520)
%sub = add <vscale x 2 x i32> %x, splat (i32 -65521)
@@ -1050,14 +1043,11 @@ define <vscale x 2 x i32> @sub_if_uge_C_nxv2i32(<vscale x 2 x i32> %x) {
define <vscale x 2 x i32> @sub_if_uge_C_swapped_nxv2i32(<vscale x 2 x i32> %x) {
; CHECK-LABEL: sub_if_uge_C_swapped_nxv2i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, 16
-; CHECK-NEXT: addi a0, a0, -15
-; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT: vmsltu.vx v0, v8, a0
; CHECK-NEXT: lui a0, 1048560
; CHECK-NEXT: addi a0, a0, 15
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
; CHECK-NEXT: vadd.vx v9, v8, a0
-; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
+; CHECK-NEXT: vminu.vv v8, v8, v9
; CHECK-NEXT: ret
%cmp = icmp ult <vscale x 2 x i32> %x, splat (i32 65521)
%sub = add <vscale x 2 x i32> %x, splat (i32 -65521)
@@ -1069,38 +1059,28 @@ define <vscale x 2 x i64> @sub_if_uge_C_nxv2i64(<vscale x 2 x i64> %x) nounwind
; RV32-LABEL: sub_if_uge_C_nxv2i64:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: li a0, 1
-; RV32-NEXT: lui a1, 172127
-; RV32-NEXT: mv a2, sp
-; RV32-NEXT: addi a1, a1, 512
-; RV32-NEXT: sw a1, 0(sp)
-; RV32-NEXT: sw a0, 4(sp)
; RV32-NEXT: li a0, -2
-; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, mu
-; RV32-NEXT: vlse64.v v10, (a2), zero
; RV32-NEXT: lui a1, 876449
; RV32-NEXT: addi a1, a1, -513
; RV32-NEXT: sw a1, 8(sp)
; RV32-NEXT: sw a0, 12(sp)
; RV32-NEXT: addi a0, sp, 8
-; RV32-NEXT: vlse64.v v12, (a0), zero
-; RV32-NEXT: vmsltu.vv v0, v10, v8
-; RV32-NEXT: vadd.vv v8, v8, v12, v0.t
+; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
+; RV32-NEXT: vlse64.v v10, (a0), zero
+; RV32-NEXT: vadd.vv v10, v8, v10
+; RV32-NEXT: vminu.vv v8, v10, v8
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: sub_if_uge_C_nxv2i64:
; RV64: # %bb.0:
-; RV64-NEXT: lui a0, 2384
-; RV64-NEXT: addi a0, a0, 761
-; RV64-NEXT: slli a0, a0, 9
-; RV64-NEXT: vsetvli a1, zero, e64, m2, ta, mu
-; RV64-NEXT: vmsgtu.vx v0, v8, a0
; RV64-NEXT: lui a0, 1048278
; RV64-NEXT: addi a0, a0, -95
; RV64-NEXT: slli a0, a0, 12
; RV64-NEXT: addi a0, a0, -513
-; RV64-NEXT: vadd.vx v8, v8, a0, v0.t
+; RV64-NEXT: vsetvli a1, zero, e64, m2, ta, ma
+; RV64-NEXT: vadd.vx v10, v8, a0
+; RV64-NEXT: vminu.vv v8, v10, v8
; RV64-NEXT: ret
%cmp = icmp ugt <vscale x 2 x i64> %x, splat (i64 5000000000)
%sub = add <vscale x 2 x i64> %x, splat (i64 -5000000001)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll
index 8cfa237858ac..23c0c826e85e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll
@@ -648,6 +648,51 @@ define void @masked_store_factor4_v2(<vscale x 1 x i1> %mask, <vscale x 1 x i32>
ret void
}
+define <vscale x 2 x i32> @load_factor2_oneactive(ptr %ptr, i32 %evl) {
+; RV32-LABEL: load_factor2_oneactive:
+; RV32: # %bb.0:
+; RV32-NEXT: slli a1, a1, 2
+; RV32-NEXT: srli a1, a1, 1
+; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; RV32-NEXT: vlseg2e32.v v7, (a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: load_factor2_oneactive:
+; RV64: # %bb.0:
+; RV64-NEXT: slli a1, a1, 34
+; RV64-NEXT: srli a1, a1, 33
+; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; RV64-NEXT: vlseg2e32.v v7, (a0)
+; RV64-NEXT: ret
+ %rvl = mul nuw i32 %evl, 4
+ %wide.masked.load = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr %ptr, <vscale x 4 x i1> splat (i1 true), i32 %rvl)
+ %deinterleaved.results = call { <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.vector.deinterleave2.nxv4i32(<vscale x 4 x i32> %wide.masked.load)
+ %t0 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %deinterleaved.results, 1
+ ret <vscale x 2 x i32> %t0
+}
+
+define <vscale x 2 x i32> @load_factor5_oneactive(ptr %ptr, i32 %evl) {
+; RV32-LABEL: load_factor5_oneactive:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; RV32-NEXT: vlseg5e32.v v5, (a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: load_factor5_oneactive:
+; RV64: # %bb.0:
+; RV64-NEXT: slli a1, a1, 32
+; RV64-NEXT: srli a1, a1, 32
+; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; RV64-NEXT: vlseg5e32.v v5, (a0)
+; RV64-NEXT: ret
+ %rvl = mul nuw i32 %evl, 5
+ %wide.masked.load = call <vscale x 10 x i32> @llvm.vp.load(ptr %ptr, <vscale x 10 x i1> splat (i1 true), i32 %rvl)
+ %deinterleaved.results = call { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.vector.deinterleave5(<vscale x 10 x i32> %wide.masked.load)
+ %t3 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } %deinterleaved.results, 3
+ ret <vscale x 2 x i32> %t3
+}
+
+
; Negative tests
define {<vscale x 2 x i32>, <vscale x 2 x i32>} @not_same_mask(<vscale x 2 x i1> %mask0, <vscale x 2 x i1> %mask1, ptr %ptr, i32 %evl) {
diff --git a/llvm/test/TableGen/directive1.td b/llvm/test/TableGen/directive1.td
index 1d2bd51204e4..3eda077eeabf 100644
--- a/llvm/test/TableGen/directive1.td
+++ b/llvm/test/TableGen/directive1.td
@@ -53,6 +53,7 @@ def TDL_DirA : Directive<[Spelling<"dira">]> {
// CHECK-EMPTY:
// CHECK-NEXT: #include "llvm/ADT/ArrayRef.h"
// CHECK-NEXT: #include "llvm/ADT/BitmaskEnum.h"
+// CHECK-NEXT: #include "llvm/ADT/Sequence.h"
// CHECK-NEXT: #include "llvm/ADT/StringRef.h"
// CHECK-NEXT: #include "llvm/Frontend/Directive/Spelling.h"
// CHECK-NEXT: #include "llvm/Support/Compiler.h"
@@ -66,22 +67,26 @@ def TDL_DirA : Directive<[Spelling<"dira">]> {
// CHECK-EMPTY:
// CHECK-NEXT: enum class Association {
// CHECK-NEXT: Block,
+// CHECK-NEXT: First_ = Block,
// CHECK-NEXT: Declaration,
// CHECK-NEXT: Delimited,
// CHECK-NEXT: Loop,
// CHECK-NEXT: None,
// CHECK-NEXT: Separating,
+// CHECK-NEXT: Last_ = Separating,
// CHECK-NEXT: };
// CHECK-EMPTY:
// CHECK-NEXT: static constexpr std::size_t Association_enumSize = 6;
// CHECK-EMPTY:
// CHECK-NEXT: enum class Category {
// CHECK-NEXT: Declarative,
+// CHECK-NEXT: First_ = Declarative,
// CHECK-NEXT: Executable,
// CHECK-NEXT: Informational,
// CHECK-NEXT: Meta,
// CHECK-NEXT: Subsidiary,
// CHECK-NEXT: Utility,
+// CHECK-NEXT: Last_ = Utility,
// CHECK-NEXT: };
// CHECK-EMPTY:
// CHECK-NEXT: static constexpr std::size_t Category_enumSize = 6;
@@ -96,6 +101,8 @@ def TDL_DirA : Directive<[Spelling<"dira">]> {
// CHECK-EMPTY:
// CHECK-NEXT: enum class Directive {
// CHECK-NEXT: TDLD_dira,
+// CHECK-NEXT: First_ = TDLD_dira,
+// CHECK-NEXT: Last_ = TDLD_dira,
// CHECK-NEXT: };
// CHECK-EMPTY:
// CHECK-NEXT: static constexpr std::size_t Directive_enumSize = 1;
@@ -104,8 +111,10 @@ def TDL_DirA : Directive<[Spelling<"dira">]> {
// CHECK-EMPTY:
// CHECK-NEXT: enum class Clause {
// CHECK-NEXT: TDLC_clausea,
+// CHECK-NEXT: First_ = TDLC_clausea,
// CHECK-NEXT: TDLC_clauseb,
// CHECK-NEXT: TDLC_clausec,
+// CHECK-NEXT: Last_ = TDLC_clausec,
// CHECK-NEXT: };
// CHECK-EMPTY:
// CHECK-NEXT: static constexpr std::size_t Clause_enumSize = 3;
@@ -151,6 +160,22 @@ def TDL_DirA : Directive<[Spelling<"dira">]> {
// CHECK-NEXT: LLVM_ABI StringRef getTdlAKindName(AKind x);
// CHECK-EMPTY:
// CHECK-NEXT: } // namespace tdl
+// CHECK-EMPTY:
+// CHECK-NEXT: template <> struct enum_iteration_traits<tdl::Association> {
+// CHECK-NEXT: static constexpr bool is_iterable = true;
+// CHECK-NEXT: };
+// CHECK-EMPTY:
+// CHECK-NEXT: template <> struct enum_iteration_traits<tdl::Category> {
+// CHECK-NEXT: static constexpr bool is_iterable = true;
+// CHECK-NEXT: };
+// CHECK-EMPTY:
+// CHECK-NEXT: template <> struct enum_iteration_traits<tdl::Directive> {
+// CHECK-NEXT: static constexpr bool is_iterable = true;
+// CHECK-NEXT: };
+// CHECK-EMPTY:
+// CHECK-NEXT: template <> struct enum_iteration_traits<tdl::Clause> {
+// CHECK-NEXT: static constexpr bool is_iterable = true;
+// CHECK-NEXT: };
// CHECK-NEXT: } // namespace llvm
// CHECK-NEXT: #endif // LLVM_Tdl_INC
diff --git a/llvm/test/TableGen/directive2.td b/llvm/test/TableGen/directive2.td
index 3a64bb3900a3..a25197c3efd9 100644
--- a/llvm/test/TableGen/directive2.td
+++ b/llvm/test/TableGen/directive2.td
@@ -46,6 +46,7 @@ def TDL_DirA : Directive<[Spelling<"dira">]> {
// CHECK-NEXT: #define LLVM_Tdl_INC
// CHECK-EMPTY:
// CHECK-NEXT: #include "llvm/ADT/ArrayRef.h"
+// CHECK-NEXT: #include "llvm/ADT/Sequence.h"
// CHECK-NEXT: #include "llvm/ADT/StringRef.h"
// CHECK-NEXT: #include "llvm/Frontend/Directive/Spelling.h"
// CHECK-NEXT: #include "llvm/Support/Compiler.h"
@@ -57,22 +58,26 @@ def TDL_DirA : Directive<[Spelling<"dira">]> {
// CHECK-EMPTY:
// CHECK-NEXT: enum class Association {
// CHECK-NEXT: Block,
+// CHECK-NEXT: First_ = Block,
// CHECK-NEXT: Declaration,
// CHECK-NEXT: Delimited,
// CHECK-NEXT: Loop,
// CHECK-NEXT: None,
// CHECK-NEXT: Separating,
+// CHECK-NEXT: Last_ = Separating,
// CHECK-NEXT: };
// CHECK-EMPTY:
// CHECK-NEXT: static constexpr std::size_t Association_enumSize = 6;
// CHECK-EMPTY:
// CHECK-NEXT: enum class Category {
// CHECK-NEXT: Declarative,
+// CHECK-NEXT: First_ = Declarative,
// CHECK-NEXT: Executable,
// CHECK-NEXT: Informational,
// CHECK-NEXT: Meta,
// CHECK-NEXT: Subsidiary,
// CHECK-NEXT: Utility,
+// CHECK-NEXT: Last_ = Utility,
// CHECK-NEXT: };
// CHECK-EMPTY:
// CHECK-NEXT: static constexpr std::size_t Category_enumSize = 6;
@@ -87,15 +92,19 @@ def TDL_DirA : Directive<[Spelling<"dira">]> {
// CHECK-EMPTY:
// CHECK-NEXT: enum class Directive {
// CHECK-NEXT: TDLD_dira,
+// CHECK-NEXT: First_ = TDLD_dira,
+// CHECK-NEXT: Last_ = TDLD_dira,
// CHECK-NEXT: };
// CHECK-EMPTY:
// CHECK-NEXT: static constexpr std::size_t Directive_enumSize = 1;
// CHECK-EMPTY:
// CHECK-NEXT: enum class Clause {
// CHECK-NEXT: TDLC_clausea,
+// CHECK-NEXT: First_ = TDLC_clausea,
// CHECK-NEXT: TDLC_clauseb,
// CHECK-NEXT: TDLC_clausec,
// CHECK-NEXT: TDLC_claused,
+// CHECK-NEXT: Last_ = TDLC_claused,
// CHECK-NEXT: };
// CHECK-EMPTY:
// CHECK-NEXT: static constexpr std::size_t Clause_enumSize = 4;
@@ -124,6 +133,22 @@ def TDL_DirA : Directive<[Spelling<"dira">]> {
// CHECK-NEXT: LLVM_ABI Category getDirectiveCategory(Directive D);
// CHECK-NEXT: LLVM_ABI SourceLanguage getDirectiveLanguages(Directive D);
// CHECK-NEXT: } // namespace tdl
+// CHECK-EMPTY:
+// CHECK-NEXT: template <> struct enum_iteration_traits<tdl::Association> {
+// CHECK-NEXT: static constexpr bool is_iterable = true;
+// CHECK-NEXT: };
+// CHECK-EMPTY:
+// CHECK-NEXT: template <> struct enum_iteration_traits<tdl::Category> {
+// CHECK-NEXT: static constexpr bool is_iterable = true;
+// CHECK-NEXT: };
+// CHECK-EMPTY:
+// CHECK-NEXT: template <> struct enum_iteration_traits<tdl::Directive> {
+// CHECK-NEXT: static constexpr bool is_iterable = true;
+// CHECK-NEXT: };
+// CHECK-EMPTY:
+// CHECK-NEXT: template <> struct enum_iteration_traits<tdl::Clause> {
+// CHECK-NEXT: static constexpr bool is_iterable = true;
+// CHECK-NEXT: };
// CHECK-NEXT: } // namespace llvm
// CHECK-NEXT: #endif // LLVM_Tdl_INC
diff --git a/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll b/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll
index 2747895f06a7..ce4270dc4b7f 100644
--- a/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll
+++ b/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll
@@ -18,11 +18,9 @@ define void @reuse_lcssa_phi_for_add_rec1(ptr %head) {
; CHECK-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 1
; CHECK-NEXT: br i1 [[EC_1]], label %[[PH:.*]], label %[[LOOP_1]]
; CHECK: [[PH]]:
-; CHECK-NEXT: [[IV_2_LCSSA:%.*]] = phi i32 [ [[IV_2]], %[[LOOP_1]] ]
; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i64 [ [[IV]], %[[LOOP_1]] ]
-; CHECK-NEXT: [[IV_2_NEXT_LCSSA:%.*]] = phi i32 [ [[IV_2_NEXT]], %[[LOOP_1]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[IV_2_NEXT]], %[[LOOP_1]] ]
; CHECK-NEXT: [[SRC_2:%.*]] = tail call noalias noundef dereferenceable_or_null(8) ptr @calloc(i64 1, i64 8)
-; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[IV_2_LCSSA]], 1
; CHECK-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP0]], i32 1)
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]]
; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
diff --git a/llvm/unittests/Frontend/OpenMPDirectiveNameParserTest.cpp b/llvm/unittests/Frontend/OpenMPDirectiveNameParserTest.cpp
index 0363a08cc0f0..10329820bef7 100644
--- a/llvm/unittests/Frontend/OpenMPDirectiveNameParserTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPDirectiveNameParserTest.cpp
@@ -48,12 +48,6 @@ static std::string &prepareParamName(std::string &Name) {
return Name;
}
-namespace llvm {
-template <> struct enum_iteration_traits<omp::Directive> {
- static constexpr bool is_iterable = true;
-};
-} // namespace llvm
-
// Test tokenizing.
class Tokenize : public testing::TestWithParam<omp::Directive> {};
@@ -87,12 +81,10 @@ getParamName1(const testing::TestParamInfo<Tokenize::ParamType> &Info) {
return prepareParamName(Name);
}
-INSTANTIATE_TEST_SUITE_P(
- DirectiveNameParserTest, Tokenize,
- testing::ValuesIn(
- llvm::enum_seq(static_cast<omp::Directive>(0),
- static_cast<omp::Directive>(omp::Directive_enumSize))),
- getParamName1);
+INSTANTIATE_TEST_SUITE_P(DirectiveNameParserTest, Tokenize,
+ testing::ValuesIn(llvm::enum_seq_inclusive(
+ omp::Directive::First_, omp::Directive::Last_)),
+ getParamName1);
// Test parsing of valid names.
@@ -131,9 +123,8 @@ getParamName2(const testing::TestParamInfo<ParseValid::ParamType> &Info) {
INSTANTIATE_TEST_SUITE_P(
DirectiveNameParserTest, ParseValid,
- testing::Combine(testing::ValuesIn(llvm::enum_seq(
- static_cast<omp::Directive>(0),
- static_cast<omp::Directive>(omp::Directive_enumSize))),
+ testing::Combine(testing::ValuesIn(llvm::enum_seq_inclusive(
+ omp::Directive::First_, omp::Directive::Last_)),
testing::ValuesIn(omp::getOpenMPVersions())),
getParamName2);
diff --git a/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp b/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp
index 177eecebce9a..f0e23690367d 100644
--- a/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp
+++ b/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp
@@ -106,8 +106,16 @@ static void generateEnumClass(ArrayRef<const Record *> Records, raw_ostream &OS,
bool ExportEnums) {
OS << "\n";
OS << "enum class " << Enum << " {\n";
- for (const Record *R : Records) {
- OS << " " << getIdentifierName(R, Prefix) << ",\n";
+ if (!Records.empty()) {
+ std::string N;
+ for (auto [I, R] : llvm::enumerate(Records)) {
+ N = getIdentifierName(R, Prefix);
+ OS << " " << N << ",\n";
+ // Make the sentinel names less likely to conflict with actual names...
+ if (I == 0)
+ OS << " First_ = " << N << ",\n";
+ }
+ OS << " Last_ = " << N << ",\n";
}
OS << "};\n";
OS << "\n";
@@ -282,6 +290,7 @@ static void emitDirectivesDecl(const RecordKeeper &Records, raw_ostream &OS) {
if (DirLang.hasEnableBitmaskEnumInNamespace())
OS << "#include \"llvm/ADT/BitmaskEnum.h\"\n";
+ OS << "#include \"llvm/ADT/Sequence.h\"\n";
OS << "#include \"llvm/ADT/StringRef.h\"\n";
OS << "#include \"llvm/Frontend/Directive/Spelling.h\"\n";
OS << "#include \"llvm/Support/Compiler.h\"\n";
@@ -375,6 +384,15 @@ static void emitDirectivesDecl(const RecordKeeper &Records, raw_ostream &OS) {
for (auto Ns : reverse(Namespaces))
OS << "} // namespace " << Ns << "\n";
+ // These specializations need to be in ::llvm.
+ for (StringRef Enum : {"Association", "Category", "Directive", "Clause"}) {
+ OS << "\n";
+ OS << "template <> struct enum_iteration_traits<"
+ << DirLang.getCppNamespace() << "::" << Enum << "> {\n";
+ OS << " static constexpr bool is_iterable = true;\n";
+ OS << "};\n";
+ }
+
OS << "} // namespace llvm\n";
OS << "#endif // LLVM_" << Lang << "_INC\n";
diff --git a/mlir/include/mlir/Analysis/Presburger/IntegerRelation.h b/mlir/include/mlir/Analysis/Presburger/IntegerRelation.h
index b68262f09f48..ee401cca8f55 100644
--- a/mlir/include/mlir/Analysis/Presburger/IntegerRelation.h
+++ b/mlir/include/mlir/Analysis/Presburger/IntegerRelation.h
@@ -707,6 +707,19 @@ public:
/// this for uniformity with `applyDomain`.
void applyRange(const IntegerRelation &rel);
+ /// Let the relation `this` be R1, and the relation `rel` be R2. Requires
+ /// R1 and R2 to have the same domain.
+ ///
+ /// Let R3 be the rangeProduct of R1 and R2. Then x R3 (y, z) iff
+ /// (x R1 y and x R2 z).
+ ///
+ /// Example:
+ ///
+ /// R1: (i, j) -> k : f(i, j, k) = 0
+ /// R2: (i, j) -> l : g(i, j, l) = 0
+ /// R1.rangeProduct(R2): (i, j) -> (k, l) : f(i, j, k) = 0 and g(i, j, l) = 0
+ IntegerRelation rangeProduct(const IntegerRelation &rel);
+
/// Given a relation `other: (A -> B)`, this operation merges the symbol and
/// local variables and then takes the composition of `other` on `this: (B ->
/// C)`. The resulting relation represents tuples of the form: `A -> C`.
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACC.h b/mlir/include/mlir/Dialect/OpenACC/OpenACC.h
index 4eb666239d4e..8f87235fcd23 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACC.h
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACC.h
@@ -29,6 +29,7 @@
#include "mlir/Interfaces/ControlFlowInterfaces.h"
#include "mlir/Interfaces/LoopLikeInterface.h"
#include "mlir/Interfaces/SideEffectInterfaces.h"
+#include <variant>
#define GET_TYPEDEF_CLASSES
#include "mlir/Dialect/OpenACC/OpenACCOpsTypes.h.inc"
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
index 66378f116784..96b9adcc53b3 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
@@ -2772,8 +2772,10 @@ def OpenACC_RoutineOp : OpenACC_Op<"routine", [IsolatedFromAbove]> {
}];
let arguments = (ins SymbolNameAttr:$sym_name, SymbolRefAttr:$func_name,
- OptionalAttr<StrArrayAttr>:$bindName,
- OptionalAttr<DeviceTypeArrayAttr>:$bindNameDeviceType,
+ OptionalAttr<SymbolRefArrayAttr>:$bindIdName,
+ OptionalAttr<StrArrayAttr>:$bindStrName,
+ OptionalAttr<DeviceTypeArrayAttr>:$bindIdNameDeviceType,
+ OptionalAttr<DeviceTypeArrayAttr>:$bindStrNameDeviceType,
OptionalAttr<DeviceTypeArrayAttr>:$worker,
OptionalAttr<DeviceTypeArrayAttr>:$vector,
OptionalAttr<DeviceTypeArrayAttr>:$seq, UnitAttr:$nohost,
@@ -2815,14 +2817,14 @@ def OpenACC_RoutineOp : OpenACC_Op<"routine", [IsolatedFromAbove]> {
std::optional<int64_t> getGangDimValue();
std::optional<int64_t> getGangDimValue(mlir::acc::DeviceType deviceType);
- std::optional<llvm::StringRef> getBindNameValue();
- std::optional<llvm::StringRef> getBindNameValue(mlir::acc::DeviceType deviceType);
+ std::optional<::std::variant<mlir::SymbolRefAttr, mlir::StringAttr>> getBindNameValue();
+ std::optional<::std::variant<mlir::SymbolRefAttr, mlir::StringAttr>> getBindNameValue(mlir::acc::DeviceType deviceType);
}];
let assemblyFormat = [{
$sym_name `func` `(` $func_name `)`
oilist (
- `bind` `(` custom<BindName>($bindName, $bindNameDeviceType) `)`
+ `bind` `(` custom<BindName>($bindIdName, $bindStrName ,$bindIdNameDeviceType, $bindStrNameDeviceType) `)`
| `gang` `` custom<RoutineGangClause>($gang, $gangDim, $gangDimDeviceType)
| `worker` custom<DeviceTypeArrayAttr>($worker)
| `vector` custom<DeviceTypeArrayAttr>($vector)
diff --git a/mlir/include/mlir/IR/PatternMatch.h b/mlir/include/mlir/IR/PatternMatch.h
index afeb784b85a1..3a2dbd136b43 100644
--- a/mlir/include/mlir/IR/PatternMatch.h
+++ b/mlir/include/mlir/IR/PatternMatch.h
@@ -475,6 +475,25 @@ public:
RewriterBase::Listener *rewriteListener;
};
+ /// A listener that logs notification events to llvm::dbgs() before
+ /// forwarding to the base listener.
+ struct PatternLoggingListener : public RewriterBase::ForwardingListener {
+ PatternLoggingListener(OpBuilder::Listener *listener, StringRef patternName)
+ : RewriterBase::ForwardingListener(listener), patternName(patternName) {
+ }
+
+ void notifyOperationInserted(Operation *op, InsertPoint previous) override;
+ void notifyOperationModified(Operation *op) override;
+ void notifyOperationReplaced(Operation *op, Operation *newOp) override;
+ void notifyOperationReplaced(Operation *op,
+ ValueRange replacement) override;
+ void notifyOperationErased(Operation *op) override;
+ void notifyPatternBegin(const Pattern &pattern, Operation *op) override;
+
+ private:
+ StringRef patternName;
+ };
+
/// Move the blocks that belong to "region" before the given position in
/// another region "parent". The two regions must be different. The caller
/// is responsible for creating or updating the operation transferring flow
diff --git a/mlir/lib/Analysis/Presburger/IntegerRelation.cpp b/mlir/lib/Analysis/Presburger/IntegerRelation.cpp
index 17e48e0d069b..5c4d4d13580a 100644
--- a/mlir/lib/Analysis/Presburger/IntegerRelation.cpp
+++ b/mlir/lib/Analysis/Presburger/IntegerRelation.cpp
@@ -2481,6 +2481,44 @@ void IntegerRelation::applyDomain(const IntegerRelation &rel) {
void IntegerRelation::applyRange(const IntegerRelation &rel) { compose(rel); }
+IntegerRelation IntegerRelation::rangeProduct(const IntegerRelation &rel) {
+ /// R1: (i, j) -> k : f(i, j, k) = 0
+ /// R2: (i, j) -> l : g(i, j, l) = 0
+ /// R1.rangeProduct(R2): (i, j) -> (k, l) : f(i, j, k) = 0 and g(i, j, l) = 0
+ assert(getNumDomainVars() == rel.getNumDomainVars() &&
+ "Range product is only defined for relations with equal domains");
+
+ // explicit copy of `this`
+ IntegerRelation result = *this;
+ unsigned relRangeVarStart = rel.getVarKindOffset(VarKind::Range);
+ unsigned numThisRangeVars = getNumRangeVars();
+ unsigned numNewSymbolVars = result.getNumSymbolVars() - getNumSymbolVars();
+
+ result.appendVar(VarKind::Range, rel.getNumRangeVars());
+
+ // Copy each equality from `rel` and update the copy to account for range
+ // variables from `this`. The `rel` equality is a list of coefficients of the
+ // variables from `rel`, and so the range variables need to be shifted right
+ // by the number of `this` range variables and symbols.
+ for (unsigned i = 0; i < rel.getNumEqualities(); ++i) {
+ SmallVector<DynamicAPInt> copy =
+ SmallVector<DynamicAPInt>(rel.getEquality(i));
+ copy.insert(copy.begin() + relRangeVarStart,
+ numThisRangeVars + numNewSymbolVars, DynamicAPInt(0));
+ result.addEquality(copy);
+ }
+
+ for (unsigned i = 0; i < rel.getNumInequalities(); ++i) {
+ SmallVector<DynamicAPInt> copy =
+ SmallVector<DynamicAPInt>(rel.getInequality(i));
+ copy.insert(copy.begin() + relRangeVarStart,
+ numThisRangeVars + numNewSymbolVars, DynamicAPInt(0));
+ result.addInequality(copy);
+ }
+
+ return result;
+}
+
void IntegerRelation::printSpace(raw_ostream &os) const {
space.print(os);
os << getNumConstraints() << " constraints\n";
diff --git a/mlir/lib/Bindings/Python/IRCore.cpp b/mlir/lib/Bindings/Python/IRCore.cpp
index d96148288530..7b790e90e0d8 100644
--- a/mlir/lib/Bindings/Python/IRCore.cpp
+++ b/mlir/lib/Bindings/Python/IRCore.cpp
@@ -97,6 +97,10 @@ Args:
binary: Whether to write bytes (True) or str (False). Defaults to False.
large_elements_limit: Whether to elide elements attributes above this
number of elements. Defaults to None (no limit).
+ large_resource_limit: Whether to elide resource attributes above this
+ number of characters. Defaults to None (no limit). If large_elements_limit
+ is set and this is None, the behavior will be to use large_elements_limit
+ as large_resource_limit.
enable_debug_info: Whether to print debug/location information. Defaults
to False.
pretty_debug_info: Whether to format debug information for easier reading
@@ -1303,6 +1307,7 @@ void PyOperation::checkValid() const {
}
void PyOperationBase::print(std::optional<int64_t> largeElementsLimit,
+ std::optional<int64_t> largeResourceLimit,
bool enableDebugInfo, bool prettyDebugInfo,
bool printGenericOpForm, bool useLocalScope,
bool useNameLocAsPrefix, bool assumeVerified,
@@ -1314,10 +1319,10 @@ void PyOperationBase::print(std::optional<int64_t> largeElementsLimit,
fileObject = nb::module_::import_("sys").attr("stdout");
MlirOpPrintingFlags flags = mlirOpPrintingFlagsCreate();
- if (largeElementsLimit) {
+ if (largeElementsLimit)
mlirOpPrintingFlagsElideLargeElementsAttrs(flags, *largeElementsLimit);
- mlirOpPrintingFlagsElideLargeResourceString(flags, *largeElementsLimit);
- }
+ if (largeResourceLimit)
+ mlirOpPrintingFlagsElideLargeResourceString(flags, *largeResourceLimit);
if (enableDebugInfo)
mlirOpPrintingFlagsEnableDebugInfo(flags, /*enable=*/true,
/*prettyForm=*/prettyDebugInfo);
@@ -1405,6 +1410,7 @@ void PyOperationBase::walk(
nb::object PyOperationBase::getAsm(bool binary,
std::optional<int64_t> largeElementsLimit,
+ std::optional<int64_t> largeResourceLimit,
bool enableDebugInfo, bool prettyDebugInfo,
bool printGenericOpForm, bool useLocalScope,
bool useNameLocAsPrefix, bool assumeVerified,
@@ -1416,6 +1422,7 @@ nb::object PyOperationBase::getAsm(bool binary,
fileObject = nb::module_::import_("io").attr("StringIO")();
}
print(/*largeElementsLimit=*/largeElementsLimit,
+ /*largeResourceLimit=*/largeResourceLimit,
/*enableDebugInfo=*/enableDebugInfo,
/*prettyDebugInfo=*/prettyDebugInfo,
/*printGenericOpForm=*/printGenericOpForm,
@@ -3348,6 +3355,7 @@ void mlir::python::populateIRCore(nb::module_ &m) {
[](PyOperationBase &self) {
return self.getAsm(/*binary=*/false,
/*largeElementsLimit=*/std::nullopt,
+ /*largeResourceLimit=*/std::nullopt,
/*enableDebugInfo=*/false,
/*prettyDebugInfo=*/false,
/*printGenericOpForm=*/false,
@@ -3363,11 +3371,12 @@ void mlir::python::populateIRCore(nb::module_ &m) {
nb::arg("state"), nb::arg("file").none() = nb::none(),
nb::arg("binary") = false, kOperationPrintStateDocstring)
.def("print",
- nb::overload_cast<std::optional<int64_t>, bool, bool, bool, bool,
- bool, bool, nb::object, bool, bool>(
- &PyOperationBase::print),
+ nb::overload_cast<std::optional<int64_t>, std::optional<int64_t>,
+ bool, bool, bool, bool, bool, bool, nb::object,
+ bool, bool>(&PyOperationBase::print),
// Careful: Lots of arguments must match up with print method.
nb::arg("large_elements_limit").none() = nb::none(),
+ nb::arg("large_resource_limit").none() = nb::none(),
nb::arg("enable_debug_info") = false,
nb::arg("pretty_debug_info") = false,
nb::arg("print_generic_op_form") = false,
@@ -3383,6 +3392,7 @@ void mlir::python::populateIRCore(nb::module_ &m) {
// Careful: Lots of arguments must match up with get_asm method.
nb::arg("binary") = false,
nb::arg("large_elements_limit").none() = nb::none(),
+ nb::arg("large_resource_limit").none() = nb::none(),
nb::arg("enable_debug_info") = false,
nb::arg("pretty_debug_info") = false,
nb::arg("print_generic_op_form") = false,
diff --git a/mlir/lib/Bindings/Python/IRModule.h b/mlir/lib/Bindings/Python/IRModule.h
index 9befcce725bb..0fdd2d1a7eff 100644
--- a/mlir/lib/Bindings/Python/IRModule.h
+++ b/mlir/lib/Bindings/Python/IRModule.h
@@ -599,18 +599,18 @@ class PyOperationBase {
public:
virtual ~PyOperationBase() = default;
/// Implements the bound 'print' method and helps with others.
- void print(std::optional<int64_t> largeElementsLimit, bool enableDebugInfo,
+ void print(std::optional<int64_t> largeElementsLimit,
+ std::optional<int64_t> largeResourceLimit, bool enableDebugInfo,
bool prettyDebugInfo, bool printGenericOpForm, bool useLocalScope,
bool useNameLocAsPrefix, bool assumeVerified,
nanobind::object fileObject, bool binary, bool skipRegions);
void print(PyAsmState &state, nanobind::object fileObject, bool binary);
- nanobind::object getAsm(bool binary,
- std::optional<int64_t> largeElementsLimit,
- bool enableDebugInfo, bool prettyDebugInfo,
- bool printGenericOpForm, bool useLocalScope,
- bool useNameLocAsPrefix, bool assumeVerified,
- bool skipRegions);
+ nanobind::object
+ getAsm(bool binary, std::optional<int64_t> largeElementsLimit,
+ std::optional<int64_t> largeResourceLimit, bool enableDebugInfo,
+ bool prettyDebugInfo, bool printGenericOpForm, bool useLocalScope,
+ bool useNameLocAsPrefix, bool assumeVerified, bool skipRegions);
// Implement the bound 'writeBytecode' method.
void writeBytecode(const nanobind::object &fileObject,
diff --git a/mlir/lib/Bindings/Python/Pass.cpp b/mlir/lib/Bindings/Python/Pass.cpp
index 8d84864b9db4..20017e25b69b 100644
--- a/mlir/lib/Bindings/Python/Pass.cpp
+++ b/mlir/lib/Bindings/Python/Pass.cpp
@@ -78,12 +78,19 @@ void mlir::python::populatePassManagerSubmodule(nb::module_ &m) {
[](PyPassManager &passManager, bool printBeforeAll,
bool printAfterAll, bool printModuleScope, bool printAfterChange,
bool printAfterFailure, std::optional<int64_t> largeElementsLimit,
- bool enableDebugInfo, bool printGenericOpForm,
+ std::optional<int64_t> largeResourceLimit, bool enableDebugInfo,
+ bool printGenericOpForm,
std::optional<std::string> optionalTreePrintingPath) {
MlirOpPrintingFlags flags = mlirOpPrintingFlagsCreate();
- if (largeElementsLimit)
+ if (largeElementsLimit) {
mlirOpPrintingFlagsElideLargeElementsAttrs(flags,
*largeElementsLimit);
+ mlirOpPrintingFlagsElideLargeResourceString(flags,
+ *largeElementsLimit);
+ }
+ if (largeResourceLimit)
+ mlirOpPrintingFlagsElideLargeResourceString(flags,
+ *largeResourceLimit);
if (enableDebugInfo)
mlirOpPrintingFlagsEnableDebugInfo(flags, /*enable=*/true,
/*prettyForm=*/false);
@@ -103,6 +110,7 @@ void mlir::python::populatePassManagerSubmodule(nb::module_ &m) {
"print_module_scope"_a = false, "print_after_change"_a = false,
"print_after_failure"_a = false,
"large_elements_limit"_a.none() = nb::none(),
+ "large_resource_limit"_a.none() = nb::none(),
"enable_debug_info"_a = false, "print_generic_op_form"_a = false,
"tree_printing_dir_path"_a.none() = nb::none(),
"Enable IR printing, default as mlir-print-ir-after-all.")
diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
index f2eab62b286a..fbc1f003ab64 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
@@ -21,6 +21,7 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/TypeSwitch.h"
#include "llvm/Support/LogicalResult.h"
+#include <variant>
using namespace mlir;
using namespace acc;
@@ -3461,40 +3462,88 @@ LogicalResult acc::RoutineOp::verify() {
return success();
}
-static ParseResult parseBindName(OpAsmParser &parser, mlir::ArrayAttr &bindName,
- mlir::ArrayAttr &deviceTypes) {
- llvm::SmallVector<mlir::Attribute> bindNameAttrs;
- llvm::SmallVector<mlir::Attribute> deviceTypeAttrs;
+static ParseResult parseBindName(OpAsmParser &parser,
+ mlir::ArrayAttr &bindIdName,
+ mlir::ArrayAttr &bindStrName,
+ mlir::ArrayAttr &deviceIdTypes,
+ mlir::ArrayAttr &deviceStrTypes) {
+ llvm::SmallVector<mlir::Attribute> bindIdNameAttrs;
+ llvm::SmallVector<mlir::Attribute> bindStrNameAttrs;
+ llvm::SmallVector<mlir::Attribute> deviceIdTypeAttrs;
+ llvm::SmallVector<mlir::Attribute> deviceStrTypeAttrs;
if (failed(parser.parseCommaSeparatedList([&]() {
- if (parser.parseAttribute(bindNameAttrs.emplace_back()))
+ mlir::Attribute newAttr;
+ bool isSymbolRefAttr;
+ auto parseResult = parser.parseAttribute(newAttr);
+ if (auto symbolRefAttr = dyn_cast<mlir::SymbolRefAttr>(newAttr)) {
+ bindIdNameAttrs.push_back(symbolRefAttr);
+ isSymbolRefAttr = true;
+ } else if (auto stringAttr = dyn_cast<mlir::StringAttr>(newAttr)) {
+ bindStrNameAttrs.push_back(stringAttr);
+ isSymbolRefAttr = false;
+ }
+ if (parseResult)
return failure();
if (failed(parser.parseOptionalLSquare())) {
- deviceTypeAttrs.push_back(mlir::acc::DeviceTypeAttr::get(
- parser.getContext(), mlir::acc::DeviceType::None));
+ if (isSymbolRefAttr) {
+ deviceIdTypeAttrs.push_back(mlir::acc::DeviceTypeAttr::get(
+ parser.getContext(), mlir::acc::DeviceType::None));
+ } else {
+ deviceStrTypeAttrs.push_back(mlir::acc::DeviceTypeAttr::get(
+ parser.getContext(), mlir::acc::DeviceType::None));
+ }
} else {
- if (parser.parseAttribute(deviceTypeAttrs.emplace_back()) ||
- parser.parseRSquare())
- return failure();
+ if (isSymbolRefAttr) {
+ if (parser.parseAttribute(deviceIdTypeAttrs.emplace_back()) ||
+ parser.parseRSquare())
+ return failure();
+ } else {
+ if (parser.parseAttribute(deviceStrTypeAttrs.emplace_back()) ||
+ parser.parseRSquare())
+ return failure();
+ }
}
return success();
})))
return failure();
- bindName = ArrayAttr::get(parser.getContext(), bindNameAttrs);
- deviceTypes = ArrayAttr::get(parser.getContext(), deviceTypeAttrs);
+ bindIdName = ArrayAttr::get(parser.getContext(), bindIdNameAttrs);
+ bindStrName = ArrayAttr::get(parser.getContext(), bindStrNameAttrs);
+ deviceIdTypes = ArrayAttr::get(parser.getContext(), deviceIdTypeAttrs);
+ deviceStrTypes = ArrayAttr::get(parser.getContext(), deviceStrTypeAttrs);
return success();
}
static void printBindName(mlir::OpAsmPrinter &p, mlir::Operation *op,
- std::optional<mlir::ArrayAttr> bindName,
- std::optional<mlir::ArrayAttr> deviceTypes) {
- llvm::interleaveComma(llvm::zip(*bindName, *deviceTypes), p,
- [&](const auto &pair) {
- p << std::get<0>(pair);
- printSingleDeviceType(p, std::get<1>(pair));
- });
+ std::optional<mlir::ArrayAttr> bindIdName,
+ std::optional<mlir::ArrayAttr> bindStrName,
+ std::optional<mlir::ArrayAttr> deviceIdTypes,
+ std::optional<mlir::ArrayAttr> deviceStrTypes) {
+ // Create combined vectors for all bind names and device types
+ llvm::SmallVector<mlir::Attribute> allBindNames;
+ llvm::SmallVector<mlir::Attribute> allDeviceTypes;
+
+ // Append bindIdName and deviceIdTypes
+ if (hasDeviceTypeValues(deviceIdTypes)) {
+ allBindNames.append(bindIdName->begin(), bindIdName->end());
+ allDeviceTypes.append(deviceIdTypes->begin(), deviceIdTypes->end());
+ }
+
+ // Append bindStrName and deviceStrTypes
+ if (hasDeviceTypeValues(deviceStrTypes)) {
+ allBindNames.append(bindStrName->begin(), bindStrName->end());
+ allDeviceTypes.append(deviceStrTypes->begin(), deviceStrTypes->end());
+ }
+
+ // Print the combined sequence
+ if (!allBindNames.empty())
+ llvm::interleaveComma(llvm::zip(allBindNames, allDeviceTypes), p,
+ [&](const auto &pair) {
+ p << std::get<0>(pair);
+ printSingleDeviceType(p, std::get<1>(pair));
+ });
}
static ParseResult parseRoutineGangClause(OpAsmParser &parser,
@@ -3654,19 +3703,32 @@ bool RoutineOp::hasSeq(mlir::acc::DeviceType deviceType) {
return hasDeviceType(getSeq(), deviceType);
}
-std::optional<llvm::StringRef> RoutineOp::getBindNameValue() {
+std::optional<std::variant<mlir::SymbolRefAttr, mlir::StringAttr>>
+RoutineOp::getBindNameValue() {
return getBindNameValue(mlir::acc::DeviceType::None);
}
-std::optional<llvm::StringRef>
+std::optional<std::variant<mlir::SymbolRefAttr, mlir::StringAttr>>
RoutineOp::getBindNameValue(mlir::acc::DeviceType deviceType) {
- if (!hasDeviceTypeValues(getBindNameDeviceType()))
+ if (!hasDeviceTypeValues(getBindIdNameDeviceType()) &&
+ !hasDeviceTypeValues(getBindStrNameDeviceType())) {
return std::nullopt;
- if (auto pos = findSegment(*getBindNameDeviceType(), deviceType)) {
- auto attr = (*getBindName())[*pos];
+ }
+
+ if (auto pos = findSegment(*getBindIdNameDeviceType(), deviceType)) {
+ auto attr = (*getBindIdName())[*pos];
+ auto symbolRefAttr = dyn_cast<mlir::SymbolRefAttr>(attr);
+ assert(symbolRefAttr && "expected SymbolRef");
+ return symbolRefAttr;
+ }
+
+ if (auto pos = findSegment(*getBindStrNameDeviceType(), deviceType)) {
+ auto attr = (*getBindStrName())[*pos];
auto stringAttr = dyn_cast<mlir::StringAttr>(attr);
- return stringAttr.getValue();
+ assert(stringAttr && "expected String");
+ return stringAttr;
}
+
return std::nullopt;
}
diff --git a/mlir/lib/IR/CMakeLists.txt b/mlir/lib/IR/CMakeLists.txt
index 4cabac185171..3ef69cea18f0 100644
--- a/mlir/lib/IR/CMakeLists.txt
+++ b/mlir/lib/IR/CMakeLists.txt
@@ -29,6 +29,7 @@ add_mlir_library(MLIRIR
ODSSupport.cpp
Operation.cpp
OperationSupport.cpp
+ PatternLoggingListener.cpp
PatternMatch.cpp
Region.cpp
RegionKindInterface.cpp
diff --git a/mlir/lib/IR/PatternLoggingListener.cpp b/mlir/lib/IR/PatternLoggingListener.cpp
new file mode 100644
index 000000000000..ce2123ae1a19
--- /dev/null
+++ b/mlir/lib/IR/PatternLoggingListener.cpp
@@ -0,0 +1,50 @@
+#include "mlir/IR/PatternMatch.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "pattern-logging-listener"
+#define DBGS() (llvm::dbgs() << "[" << DEBUG_TYPE << "] ")
+#define LDBG(X) LLVM_DEBUG(DBGS() << X << "\n")
+
+using namespace mlir;
+
+void RewriterBase::PatternLoggingListener::notifyOperationInserted(
+ Operation *op, InsertPoint previous) {
+ LDBG(patternName << " | notifyOperationInserted"
+ << " | " << op->getName());
+ ForwardingListener::notifyOperationInserted(op, previous);
+}
+
+void RewriterBase::PatternLoggingListener::notifyOperationModified(
+ Operation *op) {
+ LDBG(patternName << " | notifyOperationModified"
+ << " | " << op->getName());
+ ForwardingListener::notifyOperationModified(op);
+}
+
+void RewriterBase::PatternLoggingListener::notifyOperationReplaced(
+ Operation *op, Operation *newOp) {
+ LDBG(patternName << " | notifyOperationReplaced (with op)"
+ << " | " << op->getName() << " | " << newOp->getName());
+ ForwardingListener::notifyOperationReplaced(op, newOp);
+}
+
+void RewriterBase::PatternLoggingListener::notifyOperationReplaced(
+ Operation *op, ValueRange replacement) {
+ LDBG(patternName << " | notifyOperationReplaced (with values)"
+ << " | " << op->getName());
+ ForwardingListener::notifyOperationReplaced(op, replacement);
+}
+
+void RewriterBase::PatternLoggingListener::notifyOperationErased(
+ Operation *op) {
+ LDBG(patternName << " | notifyOperationErased"
+ << " | " << op->getName());
+ ForwardingListener::notifyOperationErased(op);
+}
+
+void RewriterBase::PatternLoggingListener::notifyPatternBegin(
+ const Pattern &pattern, Operation *op) {
+ LDBG(patternName << " | notifyPatternBegin"
+ << " | " << op->getName());
+ ForwardingListener::notifyPatternBegin(pattern, op);
+}
diff --git a/mlir/lib/Rewrite/PatternApplicator.cpp b/mlir/lib/Rewrite/PatternApplicator.cpp
index 4a12183492fd..b2b372b7b124 100644
--- a/mlir/lib/Rewrite/PatternApplicator.cpp
+++ b/mlir/lib/Rewrite/PatternApplicator.cpp
@@ -15,6 +15,10 @@
#include "ByteCode.h"
#include "llvm/Support/Debug.h"
+#ifndef NDEBUG
+#include "llvm/ADT/ScopeExit.h"
+#endif
+
#define DEBUG_TYPE "pattern-application"
using namespace mlir;
@@ -206,11 +210,19 @@ LogicalResult PatternApplicator::matchAndRewrite(
} else {
LLVM_DEBUG(llvm::dbgs() << "Trying to match \""
<< bestPattern->getDebugName() << "\"\n");
-
const auto *pattern =
static_cast<const RewritePattern *>(bestPattern);
- result = pattern->matchAndRewrite(op, rewriter);
+#ifndef NDEBUG
+ OpBuilder::Listener *oldListener = rewriter.getListener();
+ auto loggingListener =
+ std::make_unique<RewriterBase::PatternLoggingListener>(
+ oldListener, pattern->getDebugName());
+ rewriter.setListener(loggingListener.get());
+ auto resetListenerCallback = llvm::make_scope_exit(
+ [&] { rewriter.setListener(oldListener); });
+#endif
+ result = pattern->matchAndRewrite(op, rewriter);
LLVM_DEBUG(llvm::dbgs()
<< "\"" << bestPattern->getDebugName() << "\" result "
<< succeeded(result) << "\n");
diff --git a/mlir/python/mlir/_mlir_libs/_mlir/ir.pyi b/mlir/python/mlir/_mlir_libs/_mlir/ir.pyi
index ed476da28d6b..be71737e4b5b 100644
--- a/mlir/python/mlir/_mlir_libs/_mlir/ir.pyi
+++ b/mlir/python/mlir/_mlir_libs/_mlir/ir.pyi
@@ -200,6 +200,7 @@ class _OperationBase:
def get_asm(
binary: Literal[True],
large_elements_limit: int | None = None,
+ large_resource_limit: int | None = None,
enable_debug_info: bool = False,
pretty_debug_info: bool = False,
print_generic_op_form: bool = False,
@@ -212,6 +213,7 @@ class _OperationBase:
self,
binary: bool = False,
large_elements_limit: int | None = None,
+ large_resource_limit: int | None = None,
enable_debug_info: bool = False,
pretty_debug_info: bool = False,
print_generic_op_form: bool = False,
@@ -253,6 +255,7 @@ class _OperationBase:
def print(
self,
large_elements_limit: int | None = None,
+ large_resource_limit: int | None = None,
enable_debug_info: bool = False,
pretty_debug_info: bool = False,
print_generic_op_form: bool = False,
@@ -270,6 +273,10 @@ class _OperationBase:
binary: Whether to write bytes (True) or str (False). Defaults to False.
large_elements_limit: Whether to elide elements attributes above this
number of elements. Defaults to None (no limit).
+ large_resource_limit: Whether to elide resource strings above this
+ number of characters. Defaults to None (no limit). If large_elements_limit
+ is set and this is None, the behavior will be to use large_elements_limit
+ as large_resource_limit.
enable_debug_info: Whether to print debug/location information. Defaults
to False.
pretty_debug_info: Whether to format debug information for easier reading
diff --git a/mlir/python/mlir/_mlir_libs/_mlir/passmanager.pyi b/mlir/python/mlir/_mlir_libs/_mlir/passmanager.pyi
index 0d2eaffe16d3..1010daddae2a 100644
--- a/mlir/python/mlir/_mlir_libs/_mlir/passmanager.pyi
+++ b/mlir/python/mlir/_mlir_libs/_mlir/passmanager.pyi
@@ -23,6 +23,7 @@ class PassManager:
print_after_change: bool = False,
print_after_failure: bool = False,
large_elements_limit: int | None = None,
+ large_resource_limit: int | None = None,
enable_debug_info: bool = False,
print_generic_op_form: bool = False,
tree_printing_dir_path: str | None = None,
diff --git a/mlir/test/IR/test-pattern-logging-listener.mlir b/mlir/test/IR/test-pattern-logging-listener.mlir
new file mode 100644
index 000000000000..a1d27741a072
--- /dev/null
+++ b/mlir/test/IR/test-pattern-logging-listener.mlir
@@ -0,0 +1,17 @@
+// RUN: mlir-opt %s --test-walk-pattern-rewrite-driver \
+// RUN: --allow-unregistered-dialect --debug-only=pattern-logging-listener 2>&1 | FileCheck %s
+
+// Check that when replacing an op with a new op, we get appropriate
+// pattern-logging lines. The regex is because the anonymous namespace is
+// printed differently on different platforms.
+
+// CHECK: [pattern-logging-listener] {{.anonymous.namespace.}}::ReplaceWithNewOp | notifyOperationInserted | test.new_op
+// CHECK: [pattern-logging-listener] {{.anonymous.namespace.}}::ReplaceWithNewOp | notifyOperationReplaced (with values) | test.replace_with_new_op
+// CHECK: [pattern-logging-listener] {{.anonymous.namespace.}}::ReplaceWithNewOp | notifyOperationModified | arith.addi
+// CHECK: [pattern-logging-listener] {{.anonymous.namespace.}}::ReplaceWithNewOp | notifyOperationModified | arith.addi
+// CHECK: [pattern-logging-listener] {{.anonymous.namespace.}}::ReplaceWithNewOp | notifyOperationErased | test.replace_with_new_op
+func.func @replace_with_new_op() -> i32 {
+ %a = "test.replace_with_new_op"() : () -> (i32)
+ %res = arith.addi %a, %a : i32
+ return %res : i32
+}
diff --git a/mlir/test/lit.cfg.py b/mlir/test/lit.cfg.py
index 9b5cadd62bef..233fef8ec429 100644
--- a/mlir/test/lit.cfg.py
+++ b/mlir/test/lit.cfg.py
@@ -301,6 +301,17 @@ if "MLIR_OPT_CHECK_IR_ROUNDTRIP" in os.environ:
ToolSubst("mlir-opt", "mlir-opt --verify-roundtrip", unresolved="fatal"),
]
)
+elif "MLIR_GENERATE_PATTERN_CATALOG" in os.environ:
+ tools.extend(
+ [
+ ToolSubst(
+ "mlir-opt",
+ "mlir-opt --debug-only=pattern-logging-listener --mlir-disable-threading",
+ unresolved="fatal",
+ ),
+ ToolSubst("FileCheck", "FileCheck --dump-input=always", unresolved="fatal"),
+ ]
+ )
else:
tools.extend(["mlir-opt"])
diff --git a/mlir/test/python/ir/operation.py b/mlir/test/python/ir/operation.py
index b08fe98397fb..ede1571f940f 100644
--- a/mlir/test/python/ir/operation.py
+++ b/mlir/test/python/ir/operation.py
@@ -686,6 +686,15 @@ def testOperationPrint():
skip_regions=True,
)
+ # Test print with large_resource_limit.
+ # CHECK: func.func @f1(%arg0: i32) -> i32
+ # CHECK-NOT: resource1: "0x08
+ module.operation.print(large_resource_limit=2)
+
+ # Test large_elements_limit has no effect on resource string
+ # CHECK: func.func @f1(%arg0: i32) -> i32
+ # CHECK: resource1: "0x08
+ module.operation.print(large_elements_limit=2)
# CHECK-LABEL: TEST: testKnownOpView
@run
diff --git a/mlir/test/python/pass_manager.py b/mlir/test/python/pass_manager.py
index 85d2eb304882..e26d42bb3291 100644
--- a/mlir/test/python/pass_manager.py
+++ b/mlir/test/python/pass_manager.py
@@ -363,6 +363,63 @@ def testPrintIrLargeLimitElements():
pm.run(module)
+# CHECK-LABEL: TEST: testPrintIrLargeResourceLimit
+@run
+def testPrintIrLargeResourceLimit():
+ with Context() as ctx:
+ module = ModuleOp.parse(
+ """
+ module {
+ func.func @main() -> tensor<3xi64> {
+ %0 = arith.constant dense_resource<blob1> : tensor<3xi64>
+ return %0 : tensor<3xi64>
+ }
+ }
+ {-#
+ dialect_resources: {
+ builtin: {
+ blob1: "0x010000000000000002000000000000000300000000000000"
+ }
+ }
+ #-}
+ """
+ )
+ pm = PassManager.parse("builtin.module(canonicalize)")
+ ctx.enable_multithreading(False)
+ pm.enable_ir_printing(large_resource_limit=4)
+ # CHECK-NOT: blob1: "0x01
+ pm.run(module)
+
+
+# CHECK-LABEL: TEST: testPrintIrLargeResourceLimitVsElementsLimit
+@run
+def testPrintIrLargeResourceLimitVsElementsLimit():
+ """Test that large_elements_limit does not affect the printing of resources."""
+ with Context() as ctx:
+ module = ModuleOp.parse(
+ """
+ module {
+ func.func @main() -> tensor<3xi64> {
+ %0 = arith.constant dense_resource<blob1> : tensor<3xi64>
+ return %0 : tensor<3xi64>
+ }
+ }
+ {-#
+ dialect_resources: {
+ builtin: {
+ blob1: "0x010000000000000002000000000000000300000000000000"
+ }
+ }
+ #-}
+ """
+ )
+ pm = PassManager.parse("builtin.module(canonicalize)")
+ ctx.enable_multithreading(False)
+ pm.enable_ir_printing(large_elements_limit=1)
+ # CHECK-NOT: blob1: "0x01
+ pm.run(module)
+
+
# CHECK-LABEL: TEST: testPrintIrTree
@run
def testPrintIrTree():
diff --git a/mlir/unittests/Analysis/Presburger/IntegerRelationTest.cpp b/mlir/unittests/Analysis/Presburger/IntegerRelationTest.cpp
index 7df500bc9568..dd0b09f7f05d 100644
--- a/mlir/unittests/Analysis/Presburger/IntegerRelationTest.cpp
+++ b/mlir/unittests/Analysis/Presburger/IntegerRelationTest.cpp
@@ -608,3 +608,97 @@ TEST(IntegerRelationTest, convertVarKindToLocal) {
EXPECT_EQ(space.getId(VarKind::Symbol, 0), Identifier(&identifiers[3]));
EXPECT_EQ(space.getId(VarKind::Symbol, 1), Identifier(&identifiers[4]));
}
+
+TEST(IntegerRelationTest, rangeProduct) {
+ IntegerRelation r1 = parseRelationFromSet(
+ "(i, j, k) : (2*i + 3*k == 0, i >= 0, j >= 0, k >= 0)", 2);
+ IntegerRelation r2 = parseRelationFromSet(
+ "(i, j, l) : (4*i + 6*j + 9*l == 0, i >= 0, j >= 0, l >= 0)", 2);
+
+ IntegerRelation rangeProd = r1.rangeProduct(r2);
+ IntegerRelation expected =
+ parseRelationFromSet("(i, j, k, l) : (2*i + 3*k == 0, 4*i + 6*j + 9*l == "
+ "0, i >= 0, j >= 0, k >= 0, l >= 0)",
+ 2);
+
+ EXPECT_TRUE(expected.isEqual(rangeProd));
+}
+
+TEST(IntegerRelationTest, rangeProductMultdimRange) {
+ IntegerRelation r1 =
+ parseRelationFromSet("(i, k) : (2*i + 3*k == 0, i >= 0, k >= 0)", 1);
+ IntegerRelation r2 = parseRelationFromSet(
+ "(i, l, m) : (4*i + 6*m + 9*l == 0, i >= 0, l >= 0, m >= 0)", 1);
+
+ IntegerRelation rangeProd = r1.rangeProduct(r2);
+ IntegerRelation expected =
+ parseRelationFromSet("(i, k, l, m) : (2*i + 3*k == 0, 4*i + 6*m + 9*l == "
+ "0, i >= 0, k >= 0, l >= 0, m >= 0)",
+ 1);
+
+ EXPECT_TRUE(expected.isEqual(rangeProd));
+}
+
+TEST(IntegerRelationTest, rangeProductMultdimRangeSwapped) {
+ IntegerRelation r1 = parseRelationFromSet(
+ "(i, l, m) : (4*i + 6*m + 9*l == 0, i >= 0, l >= 0, m >= 0)", 1);
+ IntegerRelation r2 =
+ parseRelationFromSet("(i, k) : (2*i + 3*k == 0, i >= 0, k >= 0)", 1);
+
+ IntegerRelation rangeProd = r1.rangeProduct(r2);
+ IntegerRelation expected =
+ parseRelationFromSet("(i, l, m, k) : (2*i + 3*k == 0, 4*i + 6*m + 9*l == "
+ "0, i >= 0, k >= 0, l >= 0, m >= 0)",
+ 1);
+
+ EXPECT_TRUE(expected.isEqual(rangeProd));
+}
+
+TEST(IntegerRelationTest, rangeProductEmptyDomain) {
+ IntegerRelation r1 =
+ parseRelationFromSet("(i, j) : (4*i + 9*j == 0, i >= 0, j >= 0)", 0);
+ IntegerRelation r2 =
+ parseRelationFromSet("(k, l) : (2*k + 3*l == 0, k >= 0, l >= 0)", 0);
+ IntegerRelation rangeProd = r1.rangeProduct(r2);
+ IntegerRelation expected =
+ parseRelationFromSet("(i, j, k, l) : (2*k + 3*l == 0, 4*i + 9*j == "
+ "0, i >= 0, j >= 0, k >= 0, l >= 0)",
+ 0);
+ EXPECT_TRUE(expected.isEqual(rangeProd));
+}
+
+TEST(IntegerRelationTest, rangeProductEmptyRange) {
+ IntegerRelation r1 =
+ parseRelationFromSet("(i, j) : (4*i + 9*j == 0, i >= 0, j >= 0)", 2);
+ IntegerRelation r2 =
+ parseRelationFromSet("(i, j) : (2*i + 3*j == 0, i >= 0, j >= 0)", 2);
+ IntegerRelation rangeProd = r1.rangeProduct(r2);
+ IntegerRelation expected =
+ parseRelationFromSet("(i, j) : (2*i + 3*j == 0, 4*i + 9*j == "
+ "0, i >= 0, j >= 0)",
+ 2);
+ EXPECT_TRUE(expected.isEqual(rangeProd));
+}
+
+TEST(IntegerRelationTest, rangeProductEmptyDomainAndRange) {
+ IntegerRelation r1 = parseRelationFromSet("() : ()", 0);
+ IntegerRelation r2 = parseRelationFromSet("() : ()", 0);
+ IntegerRelation rangeProd = r1.rangeProduct(r2);
+ IntegerRelation expected = parseRelationFromSet("() : ()", 0);
+ EXPECT_TRUE(expected.isEqual(rangeProd));
+}
+
+TEST(IntegerRelationTest, rangeProductSymbols) {
+ IntegerRelation r1 = parseRelationFromSet(
+ "(i, j)[s] : (2*i + 3*j + s == 0, i >= 0, j >= 0)", 1);
+ IntegerRelation r2 = parseRelationFromSet(
+ "(i, l)[s] : (3*i + 4*l + s == 0, i >= 0, l >= 0)", 1);
+
+ IntegerRelation rangeProd = r1.rangeProduct(r2);
+ IntegerRelation expected = parseRelationFromSet(
+ "(i, j, l)[s] : (2*i + 3*j + s == 0, 3*i + 4*l + s == "
+ "0, i >= 0, j >= 0, l >= 0)",
+ 1);
+
+ EXPECT_TRUE(expected.isEqual(rangeProd));
+}
diff --git a/mlir/unittests/Dialect/OpenACC/OpenACCOpsTest.cpp b/mlir/unittests/Dialect/OpenACC/OpenACCOpsTest.cpp
index aa16421cbec5..836efdb307f9 100644
--- a/mlir/unittests/Dialect/OpenACC/OpenACCOpsTest.cpp
+++ b/mlir/unittests/Dialect/OpenACC/OpenACCOpsTest.cpp
@@ -519,14 +519,44 @@ TEST_F(OpenACCOpsTest, routineOpTest) {
op->removeGangDimDeviceTypeAttr();
op->removeGangDimAttr();
- op->setBindNameDeviceTypeAttr(b.getArrayAttr({dtypeNone}));
- op->setBindNameAttr(b.getArrayAttr({b.getStringAttr("fname")}));
+ op->setBindIdNameDeviceTypeAttr(
+ b.getArrayAttr({DeviceTypeAttr::get(&context, DeviceType::Host)}));
+ op->setBindStrNameDeviceTypeAttr(b.getArrayAttr({dtypeNone}));
+ op->setBindIdNameAttr(
+ b.getArrayAttr({SymbolRefAttr::get(&context, "test_symbol")}));
+ op->setBindStrNameAttr(b.getArrayAttr({b.getStringAttr("fname")}));
EXPECT_TRUE(op->getBindNameValue().has_value());
- EXPECT_EQ(op->getBindNameValue().value(), "fname");
- for (auto d : dtypesWithoutNone)
- EXPECT_FALSE(op->getBindNameValue(d).has_value());
- op->removeBindNameDeviceTypeAttr();
- op->removeBindNameAttr();
+ EXPECT_TRUE(op->getBindNameValue(DeviceType::Host).has_value());
+ EXPECT_EQ(std::visit(
+ [](const auto &attr) -> std::string {
+ if constexpr (std::is_same_v<std::decay_t<decltype(attr)>,
+ mlir::StringAttr>) {
+ return attr.str();
+ } else {
+ return attr.getLeafReference().str();
+ }
+ },
+ op->getBindNameValue().value()),
+ "fname");
+ EXPECT_EQ(std::visit(
+ [](const auto &attr) -> std::string {
+ if constexpr (std::is_same_v<std::decay_t<decltype(attr)>,
+ mlir::StringAttr>) {
+ return attr.str();
+ } else {
+ return attr.getLeafReference().str();
+ }
+ },
+ op->getBindNameValue(DeviceType::Host).value()),
+ "test_symbol");
+ for (auto d : dtypesWithoutNone) {
+ if (d != DeviceType::Host)
+ EXPECT_FALSE(op->getBindNameValue(d).has_value());
+ }
+ op->removeBindIdNameDeviceTypeAttr();
+ op->removeBindStrNameDeviceTypeAttr();
+ op->removeBindIdNameAttr();
+ op->removeBindStrNameAttr();
}
template <typename Op>