diff options
| author | Aiden Grossman <aidengrossman@google.com> | 2025-04-14 07:38:01 +0000 |
|---|---|---|
| committer | Aiden Grossman <aidengrossman@google.com> | 2025-04-14 07:38:01 +0000 |
| commit | 0b43a0423bbaa22384d522050a295eb564116d95 (patch) | |
| tree | 18b4111bcb0563e9f7279666299318617dacc017 | |
| parent | b7163d6a2490a7517cb8a526e36a877a4fd7bede (diff) | |
| parent | 97bc9137e545423334b00d60ab64855ccc434c3a (diff) | |
[𝘀𝗽𝗿] changes introduced through rebaseusers/boomanaiden154/main.githubci-upload-ninja_log-as-an-artifact
Created using spr 1.3.4
[skip ci]
223 files changed, 4484 insertions, 1996 deletions
diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt index 6f10afe4a562..a1e9da41b4b3 100644 --- a/clang-tools-extra/clangd/CMakeLists.txt +++ b/clang-tools-extra/clangd/CMakeLists.txt @@ -210,6 +210,9 @@ if (CLANGD_ENABLE_REMOTE) include(AddGRPC) endif() +option(CLANGD_BUILD_DEXP "Build the dexp tool as part of Clangd" ON) +llvm_canonicalize_cmake_booleans(CLANGD_BUILD_DEXP) + if(CLANG_INCLUDE_TESTS) add_subdirectory(test) add_subdirectory(unittests) @@ -220,4 +223,7 @@ option(CLANGD_ENABLE_REMOTE "Use gRPC library to enable remote index support for set(GRPC_INSTALL_PATH "" CACHE PATH "Path to gRPC library manual installation.") add_subdirectory(index/remote) -add_subdirectory(index/dex/dexp) + +if(CLANGD_BUILD_DEXP) + add_subdirectory(index/dex/dexp) +endif() diff --git a/clang-tools-extra/clangd/test/CMakeLists.txt b/clang-tools-extra/clangd/test/CMakeLists.txt index b51f461a4986..42fc3506641f 100644 --- a/clang-tools-extra/clangd/test/CMakeLists.txt +++ b/clang-tools-extra/clangd/test/CMakeLists.txt @@ -3,8 +3,6 @@ set(CLANGD_TEST_DEPS ClangdTests clangd-indexer split-file - # No tests for it, but we should still make sure they build. - dexp ) if(CLANGD_BUILD_XPC) @@ -12,6 +10,11 @@ if(CLANGD_BUILD_XPC) list(APPEND CLANGD_TEST_DEPS ClangdXpcUnitTests) endif() +if(CLANGD_BUILD_DEXP) + # No tests for it, but we should still make sure they build. + list(APPEND CLANGD_TEST_DEPS dexp) +endif() + if(CLANGD_ENABLE_REMOTE) list(APPEND CLANGD_TEST_DEPS clangd-index-server clangd-index-server-monitor) endif() diff --git a/clang-tools-extra/clangd/test/lit.site.cfg.py.in b/clang-tools-extra/clangd/test/lit.site.cfg.py.in index 1fe7c8d0f324..a0bb3561e19e 100644 --- a/clang-tools-extra/clangd/test/lit.site.cfg.py.in +++ b/clang-tools-extra/clangd/test/lit.site.cfg.py.in @@ -15,6 +15,7 @@ config.llvm_shlib_dir = "@SHLIBDIR@" config.clangd_source_dir = "@CMAKE_CURRENT_SOURCE_DIR@/.." config.clangd_binary_dir = "@CMAKE_CURRENT_BINARY_DIR@/.." config.clangd_build_xpc = @CLANGD_BUILD_XPC@ +config.clangd_build_dexp = @CLANGD_BUILD_DEXP@ config.clangd_enable_remote = @CLANGD_ENABLE_REMOTE@ config.clangd_tidy_checks = @CLANGD_TIDY_CHECKS@ config.have_zlib = @LLVM_ENABLE_ZLIB@ diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 3b8a9cac6587..971ab50cc9a6 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -818,7 +818,23 @@ of different sizes and signs is forbidden in binary and ternary builtins. T __builtin_elementwise_fmod(T x, T y) return The floating-point remainder of (x/y) whose sign floating point types matches the sign of x. T __builtin_elementwise_max(T x, T y) return x or y, whichever is larger integer and floating point types + For floating point types, follows semantics of maxNum + in IEEE 754-2008. See `LangRef + <http://llvm.org/docs/LangRef.html#llvm-min-intrinsics-comparation>`_ + for the comparison. T __builtin_elementwise_min(T x, T y) return x or y, whichever is smaller integer and floating point types + For floating point types, follows semantics of minNum + in IEEE 754-2008. See `LangRef + <http://llvm.org/docs/LangRef.html#llvm-min-intrinsics-comparation>`_ + for the comparison. + T __builtin_elementwise_maxnum(T x, T y) return x or y, whichever is larger. Follows IEEE 754-2008 floating point types + semantics (maxNum) with +0.0>-0.0. See `LangRef + <http://llvm.org/docs/LangRef.html#llvm-min-intrinsics-comparation>`_ + for the comparison. + T __builtin_elementwise_minnum(T x, T y) return x or y, whichever is smaller. Follows IEEE 754-2008 floating point types + semantics (minNum) with +0.0>-0.0. See `LangRef + <http://llvm.org/docs/LangRef.html#llvm-min-intrinsics-comparation>`_ + for the comparison. T __builtin_elementwise_add_sat(T x, T y) return the sum of x and y, clamped to the range of integer types representable values for the signed/unsigned integer type. T __builtin_elementwise_sub_sat(T x, T y) return the difference of x and y, clamped to the range of integer types diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 11f62bc881b0..fd9b9a80e993 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -38,6 +38,9 @@ Potentially Breaking Changes - Fix missing diagnostics for uses of declarations when performing typename access, such as when performing member access on a '[[deprecated]]' type alias. (#GH58547) +- For ARM targets when compiling assembly files, the features included in the selected CPU + or Architecture's FPU are included. If you wish not to use a specific feature, + the relevant ``+no`` option will need to be amended to the command line option. C/C++ Language Potentially Breaking Changes ------------------------------------------- @@ -191,6 +194,7 @@ Non-comprehensive list of changes in this release - Support parsing the `cc` operand modifier and alias it to the `c` modifier (#GH127719). - Added `__builtin_elementwise_exp10`. - For AMDPGU targets, added `__builtin_v_cvt_off_f32_i4` that maps to the `v_cvt_off_f32_i4` instruction. +- Added `__builtin_elementwise_minnum` and `__builtin_elementwise_maxnum`. New Compiler Flags ------------------ @@ -511,6 +515,7 @@ X86 Support Arm and AArch64 Support ^^^^^^^^^^^^^^^^^^^^^^^ +- For ARM targets, cc1as now considers the FPU's features for the selected CPU or Architecture. Android Support ^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 868e5b92acdc..74a11257b373 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -1304,6 +1304,18 @@ def ElementwiseMin : Builtin { let Prototype = "void(...)"; } +def ElementwiseMaxNum : Builtin { + let Spellings = ["__builtin_elementwise_maxnum"]; + let Attributes = [NoThrow, Const, CustomTypeChecking]; + let Prototype = "void(...)"; +} + +def ElementwiseMinNum : Builtin { + let Spellings = ["__builtin_elementwise_minnum"]; + let Attributes = [NoThrow, Const, CustomTypeChecking]; + let Prototype = "void(...)"; +} + def ElementwiseMaximum : Builtin { let Spellings = ["__builtin_elementwise_maximum"]; let Attributes = [NoThrow, Const, CustomTypeChecking]; diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index c1020b234b13..affc076a876a 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -6881,6 +6881,13 @@ let Flags = [TargetSpecific] in { defm android_pad_segment : BooleanFFlag<"android-pad-segment">, Group<f_Group>; } // let Flags = [TargetSpecific] +def shared_libflangrt : Flag<["-"], "shared-libflangrt">, + HelpText<"Link the flang-rt shared library">, Group<Link_Group>, + Visibility<[FlangOption]>, Flags<[NoArgumentUnused]>; +def static_libflangrt : Flag<["-"], "static-libflangrt">, + HelpText<"Link the flang-rt static library">, Group<Link_Group>, + Visibility<[FlangOption]>, Flags<[NoArgumentUnused]>; + //===----------------------------------------------------------------------===// // FLangOption + NoXarchOption //===----------------------------------------------------------------------===// diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h index 076e4296c309..d0059673d6a6 100644 --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -521,6 +521,10 @@ public: addFortranRuntimeLibraryPath(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs) const; + /// Add the path for libflang_rt.runtime.a + void addFlangRTLibPath(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) const; + const char *getCompilerRTArgString(const llvm::opt::ArgList &Args, StringRef Component, FileType Type = ToolChain::FT_Static, diff --git a/clang/include/clang/Support/RISCVVIntrinsicUtils.h b/clang/include/clang/Support/RISCVVIntrinsicUtils.h index 8f2a4f54a1b7..00a79a0fcb5d 100644 --- a/clang/include/clang/Support/RISCVVIntrinsicUtils.h +++ b/clang/include/clang/Support/RISCVVIntrinsicUtils.h @@ -11,6 +11,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitmaskEnum.h" +#include "llvm/ADT/Bitset.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include <cstdint> @@ -376,6 +377,8 @@ enum PolicyScheme : uint8_t { HasPolicyOperand, }; +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, enum PolicyScheme PS); + // TODO refactor RVVIntrinsic class design after support all intrinsic // combination. This represents an instantiation of an intrinsic with a // particular type and prototype @@ -507,6 +510,23 @@ enum RVVRequire { RVV_REQ_NUM, }; +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, enum RVVRequire Require); + +struct RequiredExtensionBits { + llvm::Bitset<RVV_REQ_NUM> Bits; + RequiredExtensionBits() {} + RequiredExtensionBits(std::initializer_list<RVVRequire> Init) { + for (auto I : Init) + Bits.set(I); + } + + void set(unsigned I) { Bits.set(I); } + bool operator[](unsigned I) const { return Bits[I]; } +}; + +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, + const RequiredExtensionBits &Exts); + // Raw RVV intrinsic info, used to expand later. // This struct is highly compact for minimized code size. struct RVVIntrinsicRecord { @@ -518,7 +538,7 @@ struct RVVIntrinsicRecord { const char *OverloadedName; // Required target features for this intrinsic. - uint32_t RequiredExtensions[(RVV_REQ_NUM + 31) / 32]; + RequiredExtensionBits RequiredExtensions; // Prototype for this intrinsic, index of RVVSignatureTable. uint16_t PrototypeIndex; diff --git a/clang/lib/AST/ByteCode/Disasm.cpp b/clang/lib/AST/ByteCode/Disasm.cpp index 12c434029562..d4c9ce6050b8 100644 --- a/clang/lib/AST/ByteCode/Disasm.cpp +++ b/clang/lib/AST/ByteCode/Disasm.cpp @@ -33,39 +33,74 @@ using namespace clang; using namespace clang::interp; -template <typename T> inline static T ReadArg(Program &P, CodePtr &OpPC) { +template <typename T> +inline static std::string printArg(Program &P, CodePtr &OpPC) { if constexpr (std::is_pointer_v<T>) { uint32_t ID = OpPC.read<uint32_t>(); - return reinterpret_cast<T>(P.getNativePointer(ID)); + std::string Result; + llvm::raw_string_ostream SS(Result); + SS << reinterpret_cast<T>(P.getNativePointer(ID)); + return Result; } else { - return OpPC.read<T>(); + std::string Result; + llvm::raw_string_ostream SS(Result); + auto Arg = OpPC.read<T>(); + SS << Arg; + return Result; } } -template <> inline Floating ReadArg<Floating>(Program &P, CodePtr &OpPC) { - Floating F = Floating::deserialize(*OpPC); +template <> inline std::string printArg<Floating>(Program &P, CodePtr &OpPC) { + auto F = Floating::deserialize(*OpPC); OpPC += align(F.bytesToSerialize()); - return F; + + std::string Result; + llvm::raw_string_ostream SS(Result); + SS << F; + return Result; } template <> -inline IntegralAP<false> ReadArg<IntegralAP<false>>(Program &P, CodePtr &OpPC) { - IntegralAP<false> I = IntegralAP<false>::deserialize(*OpPC); - OpPC += align(I.bytesToSerialize()); - return I; -} +inline std::string printArg<IntegralAP<false>>(Program &P, CodePtr &OpPC) { + auto F = IntegralAP<false>::deserialize(*OpPC); + OpPC += align(F.bytesToSerialize()); + std::string Result; + llvm::raw_string_ostream SS(Result); + SS << F; + return Result; +} template <> -inline IntegralAP<true> ReadArg<IntegralAP<true>>(Program &P, CodePtr &OpPC) { - IntegralAP<true> I = IntegralAP<true>::deserialize(*OpPC); - OpPC += align(I.bytesToSerialize()); - return I; +inline std::string printArg<IntegralAP<true>>(Program &P, CodePtr &OpPC) { + auto F = IntegralAP<true>::deserialize(*OpPC); + OpPC += align(F.bytesToSerialize()); + + std::string Result; + llvm::raw_string_ostream SS(Result); + SS << F; + return Result; } -template <> inline FixedPoint ReadArg<FixedPoint>(Program &P, CodePtr &OpPC) { - FixedPoint I = FixedPoint::deserialize(*OpPC); - OpPC += align(I.bytesToSerialize()); - return I; +template <> inline std::string printArg<FixedPoint>(Program &P, CodePtr &OpPC) { + auto F = FixedPoint::deserialize(*OpPC); + OpPC += align(F.bytesToSerialize()); + + std::string Result; + llvm::raw_string_ostream SS(Result); + SS << F; + return Result; +} + +static bool isJumpOpcode(Opcode Op) { + return Op == OP_Jmp || Op == OP_Jf || Op == OP_Jt; +} + +static size_t getNumDisplayWidth(size_t N) { + unsigned L = 1u, M = 10u; + while (M <= N && ++L != std::numeric_limits<size_t>::digits10 + 1) + M *= 10u; + + return L; } LLVM_DUMP_METHOD void Function::dump() const { dump(llvm::errs()); } @@ -80,23 +115,115 @@ LLVM_DUMP_METHOD void Function::dump(llvm::raw_ostream &OS) const { OS << "rvo: " << hasRVO() << "\n"; OS << "this arg: " << hasThisPointer() << "\n"; - auto PrintName = [&OS](const char *Name) { - OS << Name; - long N = 30 - strlen(Name); - if (N > 0) - OS.indent(N); + struct OpText { + size_t Addr; + std::string Op; + bool IsJump; + llvm::SmallVector<std::string> Args; }; + auto PrintName = [](const char *Name) -> std::string { + return std::string(Name); + }; + + llvm::SmallVector<OpText> Code; + size_t LongestAddr = 0; + size_t LongestOp = 0; + for (CodePtr Start = getCodeBegin(), PC = Start; PC != getCodeEnd();) { size_t Addr = PC - Start; + OpText Text; auto Op = PC.read<Opcode>(); - OS << llvm::format("%8d", Addr) << " "; + Text.Addr = Addr; + Text.IsJump = isJumpOpcode(Op); switch (Op) { #define GET_DISASM #include "Opcodes.inc" #undef GET_DISASM } + Code.push_back(Text); + LongestOp = std::max(Text.Op.size(), LongestOp); + LongestAddr = std::max(getNumDisplayWidth(Addr), LongestAddr); } + + // Record jumps and their targets. + struct JmpData { + size_t From; + size_t To; + }; + llvm::SmallVector<JmpData> Jumps; + for (auto &Text : Code) { + if (Text.IsJump) + Jumps.push_back({Text.Addr, Text.Addr + std::stoi(Text.Args[0]) + + align(sizeof(Opcode)) + + align(sizeof(int32_t))}); + } + + llvm::SmallVector<std::string> Text; + Text.reserve(Code.size()); + size_t LongestLine = 0; + // Print code to a string, one at a time. + for (auto C : Code) { + std::string Line; + llvm::raw_string_ostream LS(Line); + LS << C.Addr; + LS.indent(LongestAddr - getNumDisplayWidth(C.Addr) + 4); + LS << C.Op; + LS.indent(LongestOp - C.Op.size() + 4); + for (auto &Arg : C.Args) { + LS << Arg << ' '; + } + Text.push_back(Line); + LongestLine = std::max(Line.size(), LongestLine); + } + + assert(Code.size() == Text.size()); + + auto spaces = [](unsigned N) -> std::string { + std::string S; + for (unsigned I = 0; I != N; ++I) + S += ' '; + return S; + }; + + // Now, draw the jump lines. + for (auto &J : Jumps) { + if (J.To > J.From) { + bool FoundStart = false; + for (size_t LineIndex = 0; LineIndex != Text.size(); ++LineIndex) { + Text[LineIndex] += spaces(LongestLine - Text[LineIndex].size()); + + if (Code[LineIndex].Addr == J.From) { + Text[LineIndex] += " --+"; + FoundStart = true; + } else if (Code[LineIndex].Addr == J.To) { + Text[LineIndex] += " <-+"; + break; + } else if (FoundStart) { + Text[LineIndex] += " |"; + } + } + LongestLine += 5; + } else { + bool FoundStart = false; + for (ssize_t LineIndex = Text.size() - 1; LineIndex >= 0; --LineIndex) { + Text[LineIndex] += spaces(LongestLine - Text[LineIndex].size()); + if (Code[LineIndex].Addr == J.From) { + Text[LineIndex] += " --+"; + FoundStart = true; + } else if (Code[LineIndex].Addr == J.To) { + Text[LineIndex] += " <-+"; + break; + } else if (FoundStart) { + Text[LineIndex] += " |"; + } + } + LongestLine += 5; + } + } + + for (auto &Line : Text) + OS << Line << '\n'; } LLVM_DUMP_METHOD void Program::dump() const { dump(llvm::errs()); } diff --git a/clang/lib/AST/ByteCode/Interp.cpp b/clang/lib/AST/ByteCode/Interp.cpp index 0afd772c73b8..3e1f36da8925 100644 --- a/clang/lib/AST/ByteCode/Interp.cpp +++ b/clang/lib/AST/ByteCode/Interp.cpp @@ -307,7 +307,7 @@ bool isConstexprUnknown(const Pointer &P) { if (P.isDummy()) return false; const VarDecl *VD = P.block()->getDescriptor()->asVarDecl(); - return VD && VD->hasLocalStorage(); + return VD && VD->hasLocalStorage() && !isa<ParmVarDecl>(VD); } bool CheckBCPResult(InterpState &S, const Pointer &Ptr) { diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h index 4e84dcc8d551..b4e15b3ffbe6 100644 --- a/clang/lib/AST/ByteCode/Interp.h +++ b/clang/lib/AST/ByteCode/Interp.h @@ -771,6 +771,11 @@ bool IncDecHelper(InterpState &S, CodePtr OpPC, const Pointer &Ptr, bool CanOverflow) { assert(!Ptr.isDummy()); + if (!S.inConstantContext()) { + if (isConstexprUnknown(Ptr)) + return false; + } + if constexpr (std::is_same_v<T, Boolean>) { if (!S.getLangOpts().CPlusPlus14) return Invalid(S, OpPC); diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index fe55dfffc1cb..1e4e055e04af 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -3818,6 +3818,22 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(Result); } + case Builtin::BI__builtin_elementwise_maxnum: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + Value *Result = Builder.CreateBinaryIntrinsic(llvm::Intrinsic::maxnum, Op0, + Op1, nullptr, "elt.maxnum"); + return RValue::get(Result); + } + + case Builtin::BI__builtin_elementwise_minnum: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + Value *Result = Builder.CreateBinaryIntrinsic(llvm::Intrinsic::minnum, Op0, + Op1, nullptr, "elt.minnum"); + return RValue::get(Result); + } + case Builtin::BI__builtin_elementwise_maximum: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp index db2a2c574064..bcf039d9f268 100644 --- a/clang/lib/CodeGen/Targets/AMDGPU.cpp +++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp @@ -198,14 +198,10 @@ ABIArgInfo AMDGPUABIInfo::classifyKernelArgumentType(QualType Ty) const { /*ToAS=*/getContext().getTargetAddressSpace(LangAS::cuda_device)); } - // FIXME: Should also use this for OpenCL, but it requires addressing the - // problem of kernels being called. - // // FIXME: This doesn't apply the optimization of coercing pointers in structs // to global address space when using byref. This would require implementing a // new kind of coercion of the in-memory type when for indirect arguments. - if (!getContext().getLangOpts().OpenCL && LTy == OrigLTy && - isAggregateTypeForABI(Ty)) { + if (LTy == OrigLTy && isAggregateTypeForABI(Ty)) { return ABIArgInfo::getIndirectAliased( getContext().getTypeAlignInChars(Ty), getContext().getTargetAddressSpace(LangAS::opencl_constant), diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 36d0ae34dec8..97317579c8a5 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -744,9 +744,12 @@ std::string ToolChain::buildCompilerRTBasename(const llvm::opt::ArgList &Args, Suffix = IsITANMSVCWindows ? ".lib" : ".a"; break; case ToolChain::FT_Shared: - Suffix = TT.isOSWindows() - ? (TT.isWindowsGNUEnvironment() ? ".dll.a" : ".lib") - : ".so"; + if (TT.isOSWindows()) + Suffix = TT.isWindowsGNUEnvironment() ? ".dll.a" : ".lib"; + else if (TT.isOSAIX()) + Suffix = ".a"; + else + Suffix = ".so"; break; } @@ -816,8 +819,7 @@ void ToolChain::addFortranRuntimeLibs(const ArgList &Args, if (AsNeeded) addAsNeededOption(*this, Args, CmdArgs, /*as_needed=*/false); } - CmdArgs.push_back("-lflang_rt.runtime"); - addArchSpecificRPath(*this, Args, CmdArgs); + addFlangRTLibPath(Args, CmdArgs); // needs libexecinfo for backtrace functions if (getTriple().isOSFreeBSD() || getTriple().isOSNetBSD() || @@ -850,6 +852,20 @@ void ToolChain::addFortranRuntimeLibraryPath(const llvm::opt::ArgList &Args, CmdArgs.push_back(Args.MakeArgString("-L" + DefaultLibPath)); } +void ToolChain::addFlangRTLibPath(const ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) const { + // Link static flang_rt.runtime.a or shared flang_rt.runtime.so. + // On AIX, default to static flang-rt. + if (Args.hasFlag(options::OPT_static_libflangrt, + options::OPT_shared_libflangrt, getTriple().isOSAIX())) + CmdArgs.push_back( + getCompilerRTArgString(Args, "runtime", ToolChain::FT_Static, true)); + else { + CmdArgs.push_back("-lflang_rt.runtime"); + addArchSpecificRPath(*this, Args, CmdArgs); + } +} + // Android target triples contain a target version. If we don't have libraries // for the exact target version, we should fall back to the next newest version // or a versionless path, if any. diff --git a/clang/lib/Driver/ToolChains/AIX.cpp b/clang/lib/Driver/ToolChains/AIX.cpp index 26b9d4c772be..5dc80bc5a3d2 100644 --- a/clang/lib/Driver/ToolChains/AIX.cpp +++ b/clang/lib/Driver/ToolChains/AIX.cpp @@ -608,14 +608,6 @@ void AIX::addProfileRTLibs(const llvm::opt::ArgList &Args, ToolChain::addProfileRTLibs(Args, CmdArgs); } -void AIX::addFortranRuntimeLibs(const ArgList &Args, - llvm::opt::ArgStringList &CmdArgs) const { - // Link flang_rt.runtime.a. On AIX, the static and shared library are all - // named .a - CmdArgs.push_back( - getCompilerRTArgString(Args, "runtime", ToolChain::FT_Static, true)); -} - ToolChain::CXXStdlibType AIX::GetDefaultCXXStdlibType() const { return ToolChain::CST_Libcxx; } diff --git a/clang/lib/Driver/ToolChains/AIX.h b/clang/lib/Driver/ToolChains/AIX.h index 17e8370cd121..8f130f6b5454 100644 --- a/clang/lib/Driver/ToolChains/AIX.h +++ b/clang/lib/Driver/ToolChains/AIX.h @@ -87,9 +87,6 @@ public: void addProfileRTLibs(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs) const override; - void addFortranRuntimeLibs(const llvm::opt::ArgList &Args, - llvm::opt::ArgStringList &CmdArgs) const override; - CXXStdlibType GetDefaultCXXStdlibType() const override; RuntimeLibType GetDefaultRuntimeLibType() const override; diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index d1de3b91c352..35ca019795dd 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -65,6 +65,10 @@ void RocmInstallationDetector::scanLibDevicePath(llvm::StringRef Path) { FiniteOnly.Off = FilePath; } else if (BaseName == "oclc_finite_only_on") { FiniteOnly.On = FilePath; + } else if (BaseName == "oclc_daz_opt_on") { + DenormalsAreZero.On = FilePath; + } else if (BaseName == "oclc_daz_opt_off") { + DenormalsAreZero.Off = FilePath; } else if (BaseName == "oclc_correctly_rounded_sqrt_on") { CorrectlyRoundedSqrt.On = FilePath; } else if (BaseName == "oclc_correctly_rounded_sqrt_off") { @@ -881,6 +885,10 @@ void ROCMToolChain::addClangTargetOptions( return; bool Wave64 = isWave64(DriverArgs, Kind); + // TODO: There are way too many flags that change this. Do we need to check + // them all? + bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) || + getDefaultDenormsAreZeroForTarget(Kind); bool FiniteOnly = DriverArgs.hasArg(options::OPT_cl_finite_math_only); bool UnsafeMathOpt = @@ -901,7 +909,7 @@ void ROCMToolChain::addClangTargetOptions( // Add the generic set of libraries. BCLibs.append(RocmInstallation->getCommonBitcodeLibs( - DriverArgs, LibDeviceFile, Wave64, FiniteOnly, UnsafeMathOpt, + DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt, FastRelaxedMath, CorrectSqrt, ABIVer, GPUSan, false)); for (auto [BCFile, Internalize] : BCLibs) { @@ -940,8 +948,9 @@ bool RocmInstallationDetector::checkCommonBitcodeLibs( llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> RocmInstallationDetector::getCommonBitcodeLibs( const llvm::opt::ArgList &DriverArgs, StringRef LibDeviceFile, bool Wave64, - bool FiniteOnly, bool UnsafeMathOpt, bool FastRelaxedMath, bool CorrectSqrt, - DeviceLibABIVersion ABIVer, bool GPUSan, bool isOpenMP) const { + bool DAZ, bool FiniteOnly, bool UnsafeMathOpt, bool FastRelaxedMath, + bool CorrectSqrt, DeviceLibABIVersion ABIVer, bool GPUSan, + bool isOpenMP) const { llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> BCLibs; auto AddBCLib = [&](ToolChain::BitCodeLibraryInfo BCLib, @@ -960,6 +969,7 @@ RocmInstallationDetector::getCommonBitcodeLibs( AddBCLib(getOCKLPath()); else if (GPUSan && isOpenMP) AddBCLib(getOCKLPath(), false); + AddBCLib(getDenormalsAreZeroPath(DAZ)); AddBCLib(getUnsafeMathPath(UnsafeMathOpt || FastRelaxedMath)); AddBCLib(getFiniteOnlyPath(FiniteOnly || FastRelaxedMath)); AddBCLib(getCorrectlyRoundedSqrtPath(CorrectSqrt)); @@ -987,6 +997,11 @@ ROCMToolChain::getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs, return {}; // If --hip-device-lib is not set, add the default bitcode libraries. + // TODO: There are way too many flags that change this. Do we need to check + // them all? + bool DAZ = DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero, + options::OPT_fno_gpu_flush_denormals_to_zero, + getDefaultDenormsAreZeroForTarget(Kind)); bool FiniteOnly = DriverArgs.hasFlag( options::OPT_ffinite_math_only, options::OPT_fno_finite_math_only, false); bool UnsafeMathOpt = @@ -1006,7 +1021,7 @@ ROCMToolChain::getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs, getSanitizerArgs(DriverArgs).needsAsanRt(); return RocmInstallation->getCommonBitcodeLibs( - DriverArgs, LibDeviceFile, Wave64, FiniteOnly, UnsafeMathOpt, + DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt, FastRelaxedMath, CorrectSqrt, ABIVer, GPUSan, isOpenMP); } diff --git a/clang/lib/Driver/ToolChains/Arch/ARM.cpp b/clang/lib/Driver/ToolChains/Arch/ARM.cpp index e50cb3836f2c..5084058b3fef 100644 --- a/clang/lib/Driver/ToolChains/Arch/ARM.cpp +++ b/clang/lib/Driver/ToolChains/Arch/ARM.cpp @@ -679,21 +679,17 @@ llvm::ARM::FPUKind arm::getARMTargetFeatures(const Driver &D, CPUArgFPUKind != llvm::ARM::FK_INVALID ? CPUArgFPUKind : ArchArgFPUKind; (void)llvm::ARM::getFPUFeatures(FPUKind, Features); } else { - bool Generic = true; - if (!ForAS) { - std::string CPU = arm::getARMTargetCPU(CPUName, ArchName, Triple); - if (CPU != "generic") - Generic = false; - llvm::ARM::ArchKind ArchKind = - arm::getLLVMArchKindForARM(CPU, ArchName, Triple); - FPUKind = llvm::ARM::getDefaultFPU(CPU, ArchKind); - (void)llvm::ARM::getFPUFeatures(FPUKind, Features); - } + std::string CPU = arm::getARMTargetCPU(CPUName, ArchName, Triple); + bool Generic = CPU == "generic"; if (Generic && (Triple.isOSWindows() || Triple.isOSDarwin()) && getARMSubArchVersionNumber(Triple) >= 7) { FPUKind = llvm::ARM::parseFPU("neon"); - (void)llvm::ARM::getFPUFeatures(FPUKind, Features); + } else { + llvm::ARM::ArchKind ArchKind = + arm::getLLVMArchKindForARM(CPU, ArchName, Triple); + FPUKind = llvm::ARM::getDefaultFPU(CPU, ArchKind); } + (void)llvm::ARM::getFPUFeatures(FPUKind, Features); } // Now we've finished accumulating features from arch, cpu and fpu, diff --git a/clang/lib/Driver/ToolChains/PPCLinux.cpp b/clang/lib/Driver/ToolChains/PPCLinux.cpp index 575e88c6ab12..0ed0f91ad166 100644 --- a/clang/lib/Driver/ToolChains/PPCLinux.cpp +++ b/clang/lib/Driver/ToolChains/PPCLinux.cpp @@ -12,7 +12,6 @@ #include "clang/Driver/Options.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" -#include "llvm/Support/VirtualFileSystem.h" using namespace clang::driver; using namespace clang::driver::toolchains; @@ -102,18 +101,3 @@ bool PPCLinuxToolChain::SupportIEEEFloat128( return GlibcSupportsFloat128((Twine(D.DyldPrefix) + Linker).str()) && !(D.CCCIsCXX() && HasUnsupportedCXXLib); } - -void PPCLinuxToolChain::addFortranRuntimeLibs( - const ArgList &Args, llvm::opt::ArgStringList &CmdArgs) const { - // Link static flang_rt.runtime.a or shared flang_rt.runtime.so - const char *Path; - if (getVFS().exists(Twine(Path = getCompilerRTArgString( - Args, "runtime", ToolChain::FT_Static, true)))) - CmdArgs.push_back(Path); - else if (getVFS().exists( - Twine(Path = getCompilerRTArgString( - Args, "runtime", ToolChain::FT_Shared, true)))) - CmdArgs.push_back(Path); - else - CmdArgs.push_back("-lflang_rt.runtime"); -} diff --git a/clang/lib/Driver/ToolChains/PPCLinux.h b/clang/lib/Driver/ToolChains/PPCLinux.h index 910df3d16e6a..63adaff6be9c 100644 --- a/clang/lib/Driver/ToolChains/PPCLinux.h +++ b/clang/lib/Driver/ToolChains/PPCLinux.h @@ -24,9 +24,6 @@ public: AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override; - void addFortranRuntimeLibs(const llvm::opt::ArgList &Args, - llvm::opt::ArgStringList &CmdArgs) const override; - private: bool SupportIEEEFloat128(const Driver &D, const llvm::Triple &Triple, const llvm::opt::ArgList &Args) const; diff --git a/clang/lib/Driver/ToolChains/ROCm.h b/clang/lib/Driver/ToolChains/ROCm.h index f002b386e11c..2a09da011489 100644 --- a/clang/lib/Driver/ToolChains/ROCm.h +++ b/clang/lib/Driver/ToolChains/ROCm.h @@ -137,6 +137,7 @@ private: ConditionalLibrary WavefrontSize64; ConditionalLibrary FiniteOnly; ConditionalLibrary UnsafeMath; + ConditionalLibrary DenormalsAreZero; ConditionalLibrary CorrectlyRoundedSqrt; // Maps ABI version to library path. The version number is in the format of @@ -151,7 +152,8 @@ private: bool allGenericLibsValid() const { return !OCML.empty() && !OCKL.empty() && !OpenCL.empty() && WavefrontSize64.isValid() && FiniteOnly.isValid() && - UnsafeMath.isValid() && CorrectlyRoundedSqrt.isValid(); + UnsafeMath.isValid() && DenormalsAreZero.isValid() && + CorrectlyRoundedSqrt.isValid(); } void scanLibDevicePath(llvm::StringRef Path); @@ -173,12 +175,11 @@ public: /// Get file paths of default bitcode libraries common to AMDGPU based /// toolchains. - llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> - getCommonBitcodeLibs(const llvm::opt::ArgList &DriverArgs, - StringRef LibDeviceFile, bool Wave64, bool FiniteOnly, - bool UnsafeMathOpt, bool FastRelaxedMath, - bool CorrectSqrt, DeviceLibABIVersion ABIVer, - bool GPUSan, bool isOpenMP) const; + llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> getCommonBitcodeLibs( + const llvm::opt::ArgList &DriverArgs, StringRef LibDeviceFile, + bool Wave64, bool DAZ, bool FiniteOnly, bool UnsafeMathOpt, + bool FastRelaxedMath, bool CorrectSqrt, DeviceLibABIVersion ABIVer, + bool GPUSan, bool isOpenMP) const; /// Check file paths of default bitcode libraries common to AMDGPU based /// toolchains. \returns false if there are invalid or missing files. bool checkCommonBitcodeLibs(StringRef GPUArch, StringRef LibDeviceFile, @@ -244,6 +245,10 @@ public: return UnsafeMath.get(Enabled); } + StringRef getDenormalsAreZeroPath(bool Enabled) const { + return DenormalsAreZero.get(Enabled); + } + StringRef getCorrectlyRoundedSqrtPath(bool Enabled) const { return CorrectlyRoundedSqrt.get(Enabled); } diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 82dc403538c4..ef5f07e2c62e 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -5569,7 +5569,8 @@ static bool isAllmanBrace(const FormatToken &Tok) { // Returns 'true' if 'Tok' is a function argument. static bool IsFunctionArgument(const FormatToken &Tok) { return Tok.MatchingParen && Tok.MatchingParen->Next && - Tok.MatchingParen->Next->isOneOf(tok::comma, tok::r_paren); + Tok.MatchingParen->Next->isOneOf(tok::comma, tok::r_paren, + tok::r_brace); } static bool diff --git a/clang/lib/Format/WhitespaceManager.cpp b/clang/lib/Format/WhitespaceManager.cpp index b1e43f0313db..cfaadf07edfd 100644 --- a/clang/lib/Format/WhitespaceManager.cpp +++ b/clang/lib/Format/WhitespaceManager.cpp @@ -1012,13 +1012,8 @@ void WhitespaceManager::alignConsecutiveDeclarations() { AlignTokens( Style, [&](Change const &C) { - if (Style.AlignConsecutiveDeclarations.AlignFunctionPointers) { - for (const auto *Prev = C.Tok->Previous; Prev; Prev = Prev->Previous) - if (Prev->is(tok::equal)) - return false; - if (C.Tok->is(TT_FunctionTypeLParen)) - return true; - } + if (C.Tok->is(TT_FunctionTypeLParen)) + return Style.AlignConsecutiveDeclarations.AlignFunctionPointers; if (C.Tok->is(TT_FunctionDeclarationName)) return Style.AlignConsecutiveDeclarations.AlignFunctionDeclarations; if (C.Tok->isNot(TT_StartOfName)) diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index bffd0dd461d3..13bc2bd14621 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2762,6 +2762,8 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, // These builtins restrict the element type to floating point // types only, and take in two arguments. + case Builtin::BI__builtin_elementwise_minnum: + case Builtin::BI__builtin_elementwise_maxnum: case Builtin::BI__builtin_elementwise_minimum: case Builtin::BI__builtin_elementwise_maximum: case Builtin::BI__builtin_elementwise_atan2: diff --git a/clang/lib/Sema/SemaRISCV.cpp b/clang/lib/Sema/SemaRISCV.cpp index 746609604d1b..b9f843b1920a 100644 --- a/clang/lib/Sema/SemaRISCV.cpp +++ b/clang/lib/Sema/SemaRISCV.cpp @@ -232,8 +232,7 @@ void RISCVIntrinsicManagerImpl::ConstructRVVIntrinsics( for (auto &Record : Recs) { // Check requirements. if (llvm::any_of(FeatureCheckList, [&](const auto &Item) { - return ((Record.RequiredExtensions[Item.second / 32] & - (1U << (Item.second % 32))) != 0) && + return Record.RequiredExtensions[Item.second] && !TI.hasFeature(Item.first); })) continue; diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index 108d7e1dbaeb..5c80077f294c 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -1280,12 +1280,10 @@ void OpenACCDeclClauseInstantiator::VisitDevicePtrClause( const OpenACCDevicePtrClause &C) { llvm::SmallVector<Expr *> VarList = VisitVarList(C.getVarList()); // Ensure each var is a pointer type. - VarList.erase(std::remove_if(VarList.begin(), VarList.end(), - [&](Expr *E) { - return SemaRef.OpenACC().CheckVarIsPointerType( - OpenACCClauseKind::DevicePtr, E); - }), - VarList.end()); + llvm::erase_if(VarList, [&](Expr *E) { + return SemaRef.OpenACC().CheckVarIsPointerType(OpenACCClauseKind::DevicePtr, + E); + }); ParsedClause.setVarListDetails(VarList, OpenACCModifierKind::Invalid); if (SemaRef.OpenACC().CheckDeclareClause(ParsedClause, OpenACCModifierKind::Invalid)) diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 21e250e172d5..bb58ec49612c 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -11992,13 +11992,10 @@ void OpenACCClauseTransform<Derived>::VisitDetachClause( llvm::SmallVector<Expr *> VarList = VisitVarList(C.getVarList()); // Ensure each var is a pointer type. - VarList.erase( - std::remove_if(VarList.begin(), VarList.end(), - [&](Expr *E) { - return Self.getSema().OpenACC().CheckVarIsPointerType( - OpenACCClauseKind::Detach, E); - }), - VarList.end()); + llvm::erase_if(VarList, [&](Expr *E) { + return Self.getSema().OpenACC().CheckVarIsPointerType( + OpenACCClauseKind::Detach, E); + }); ParsedClause.setVarListDetails(VarList, OpenACCModifierKind::Invalid); NewClause = OpenACCDetachClause::Create( diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index a48c05061626..95b5718f1d14 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -1493,42 +1493,45 @@ void ASTWriter::WriteControlBlock(Preprocessor &PP, StringRef isysroot) { unsigned AbbrevCode = Stream.EmitAbbrev(std::move(Abbrev)); RecordData::value_type Record[] = {MODULE_NAME}; Stream.EmitRecordWithBlob(AbbrevCode, Record, WritingModule->Name); - } - if (WritingModule && WritingModule->Directory) { - SmallString<128> BaseDir; - if (PP.getHeaderSearchInfo().getHeaderSearchOpts().ModuleFileHomeIsCwd) { - // Use the current working directory as the base path for all inputs. - auto CWD = FileMgr.getOptionalDirectoryRef("."); - BaseDir.assign(CWD->getName()); - } else { - BaseDir.assign(WritingModule->Directory->getName()); - } - cleanPathForOutput(FileMgr, BaseDir); - - // If the home of the module is the current working directory, then we - // want to pick up the cwd of the build process loading the module, not - // our cwd, when we load this module. - if (!PP.getHeaderSearchInfo().getHeaderSearchOpts().ModuleFileHomeIsCwd && - (!PP.getHeaderSearchInfo() - .getHeaderSearchOpts() - .ModuleMapFileHomeIsCwd || - WritingModule->Directory->getName() != ".")) { - // Module directory. - auto Abbrev = std::make_shared<BitCodeAbbrev>(); - Abbrev->Add(BitCodeAbbrevOp(MODULE_DIRECTORY)); - Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Directory - unsigned AbbrevCode = Stream.EmitAbbrev(std::move(Abbrev)); + auto BaseDir = [&]() -> std::optional<SmallString<128>> { + if (PP.getHeaderSearchInfo().getHeaderSearchOpts().ModuleFileHomeIsCwd) { + // Use the current working directory as the base path for all inputs. + auto CWD = FileMgr.getOptionalDirectoryRef("."); + return CWD->getName(); + } + if (WritingModule->Directory) { + return WritingModule->Directory->getName(); + } + return std::nullopt; + }(); + if (BaseDir) { + cleanPathForOutput(FileMgr, *BaseDir); + + // If the home of the module is the current working directory, then we + // want to pick up the cwd of the build process loading the module, not + // our cwd, when we load this module. + if (!PP.getHeaderSearchInfo().getHeaderSearchOpts().ModuleFileHomeIsCwd && + (!PP.getHeaderSearchInfo() + .getHeaderSearchOpts() + .ModuleMapFileHomeIsCwd || + WritingModule->Directory->getName() != ".")) { + // Module directory. + auto Abbrev = std::make_shared<BitCodeAbbrev>(); + Abbrev->Add(BitCodeAbbrevOp(MODULE_DIRECTORY)); + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Directory + unsigned AbbrevCode = Stream.EmitAbbrev(std::move(Abbrev)); + + RecordData::value_type Record[] = {MODULE_DIRECTORY}; + Stream.EmitRecordWithBlob(AbbrevCode, Record, *BaseDir); + } - RecordData::value_type Record[] = {MODULE_DIRECTORY}; - Stream.EmitRecordWithBlob(AbbrevCode, Record, BaseDir); + // Write out all other paths relative to the base directory if possible. + BaseDirectory.assign(BaseDir->begin(), BaseDir->end()); + } else if (!isysroot.empty()) { + // Write out paths relative to the sysroot if possible. + BaseDirectory = std::string(isysroot); } - - // Write out all other paths relative to the base directory if possible. - BaseDirectory.assign(BaseDir.begin(), BaseDir.end()); - } else if (!isysroot.empty()) { - // Write out paths relative to the sysroot if possible. - BaseDirectory = std::string(isysroot); } // Module map file diff --git a/clang/lib/Support/RISCVVIntrinsicUtils.cpp b/clang/lib/Support/RISCVVIntrinsicUtils.cpp index e44fbb018183..6378596ef31e 100644 --- a/clang/lib/Support/RISCVVIntrinsicUtils.cpp +++ b/clang/lib/Support/RISCVVIntrinsicUtils.cpp @@ -1196,36 +1196,91 @@ SmallVector<PrototypeDescriptor> parsePrototypes(StringRef Prototypes) { return PrototypeDescriptors; } +#define STRINGIFY(NAME) \ + case NAME: \ + OS << #NAME; \ + break; + +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, enum PolicyScheme PS) { + switch (PS) { + STRINGIFY(SchemeNone) + STRINGIFY(HasPassthruOperand) + STRINGIFY(HasPolicyOperand) + } + return OS; +} + +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, enum RVVRequire Require) { + switch (Require) { + STRINGIFY(RVV_REQ_RV64) + STRINGIFY(RVV_REQ_Zvfhmin) + STRINGIFY(RVV_REQ_Xsfvcp) + STRINGIFY(RVV_REQ_Xsfvfnrclipxfqf) + STRINGIFY(RVV_REQ_Xsfvfwmaccqqq) + STRINGIFY(RVV_REQ_Xsfvqmaccdod) + STRINGIFY(RVV_REQ_Xsfvqmaccqoq) + STRINGIFY(RVV_REQ_Zvbb) + STRINGIFY(RVV_REQ_Zvbc) + STRINGIFY(RVV_REQ_Zvkb) + STRINGIFY(RVV_REQ_Zvkg) + STRINGIFY(RVV_REQ_Zvkned) + STRINGIFY(RVV_REQ_Zvknha) + STRINGIFY(RVV_REQ_Zvknhb) + STRINGIFY(RVV_REQ_Zvksed) + STRINGIFY(RVV_REQ_Zvksh) + STRINGIFY(RVV_REQ_Zvfbfwma) + STRINGIFY(RVV_REQ_Zvfbfmin) + STRINGIFY(RVV_REQ_Zvfh) + STRINGIFY(RVV_REQ_Experimental) + default: + llvm_unreachable("Unsupported RVVRequire!"); + break; + } + return OS; +} + +#undef STRINGIFY + +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, + const RequiredExtensionBits &Exts) { + OS << "{"; + ListSeparator LS; + for (unsigned I = 0; I < RVV_REQ_NUM; I++) + if (Exts[I]) + OS << LS << static_cast<RVVRequire>(I); + OS << "}"; + return OS; +} + raw_ostream &operator<<(raw_ostream &OS, const RVVIntrinsicRecord &Record) { OS << "{"; - OS << "\"" << Record.Name << "\","; + OS << "/*Name=*/\"" << Record.Name << "\", "; if (Record.OverloadedName == nullptr || StringRef(Record.OverloadedName).empty()) - OS << "nullptr,"; + OS << "/*OverloadedName=*/nullptr, "; else - OS << "\"" << Record.OverloadedName << "\","; - OS << "{"; - for (uint32_t Exts : Record.RequiredExtensions) - OS << Exts << ','; - OS << "},"; - OS << Record.PrototypeIndex << ","; - OS << Record.SuffixIndex << ","; - OS << Record.OverloadedSuffixIndex << ","; - OS << (int)Record.PrototypeLength << ","; - OS << (int)Record.SuffixLength << ","; - OS << (int)Record.OverloadedSuffixSize << ","; - OS << (int)Record.TypeRangeMask << ","; - OS << (int)Record.Log2LMULMask << ","; - OS << (int)Record.NF << ","; - OS << (int)Record.HasMasked << ","; - OS << (int)Record.HasVL << ","; - OS << (int)Record.HasMaskedOffOperand << ","; - OS << (int)Record.HasTailPolicy << ","; - OS << (int)Record.HasMaskPolicy << ","; - OS << (int)Record.HasFRMRoundModeOp << ","; - OS << (int)Record.IsTuple << ","; - OS << (int)Record.UnMaskedPolicyScheme << ","; - OS << (int)Record.MaskedPolicyScheme << ","; + OS << "/*OverloadedName=*/\"" << Record.OverloadedName << "\", "; + OS << "/*RequiredExtensions=*/" << Record.RequiredExtensions << ", "; + OS << "/*PrototypeIndex=*/" << Record.PrototypeIndex << ", "; + OS << "/*SuffixIndex=*/" << Record.SuffixIndex << ", "; + OS << "/*OverloadedSuffixIndex=*/" << Record.OverloadedSuffixIndex << ", "; + OS << "/*PrototypeLength=*/" << (int)Record.PrototypeLength << ", "; + OS << "/*SuffixLength=*/" << (int)Record.SuffixLength << ", "; + OS << "/*OverloadedSuffixSize=*/" << (int)Record.OverloadedSuffixSize << ", "; + OS << "/*TypeRangeMask=*/" << (int)Record.TypeRangeMask << ", "; + OS << "/*Log2LMULMask=*/" << (int)Record.Log2LMULMask << ", "; + OS << "/*NF=*/" << (int)Record.NF << ", "; + OS << "/*HasMasked=*/" << (int)Record.HasMasked << ", "; + OS << "/*HasVL=*/" << (int)Record.HasVL << ", "; + OS << "/*HasMaskedOffOperand=*/" << (int)Record.HasMaskedOffOperand << ", "; + OS << "/*HasTailPolicy=*/" << (int)Record.HasTailPolicy << ", "; + OS << "/*HasMaskPolicy=*/" << (int)Record.HasMaskPolicy << ", "; + OS << "/*HasFRMRoundModeOp=*/" << (int)Record.HasFRMRoundModeOp << ", "; + OS << "/*IsTuple=*/" << (int)Record.IsTuple << ", "; + OS << "/*UnMaskedPolicyScheme=*/" << (PolicyScheme)Record.UnMaskedPolicyScheme + << ", "; + OS << "/*MaskedPolicyScheme=*/" << (PolicyScheme)Record.MaskedPolicyScheme + << ", "; OS << "},\n"; return OS; } diff --git a/clang/test/AST/ByteCode/codegen.cpp b/clang/test/AST/ByteCode/codegen.cpp index ea2c812f30f6..7c853a20362b 100644 --- a/clang/test/AST/ByteCode/codegen.cpp +++ b/clang/test/AST/ByteCode/codegen.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -o - %s -fexperimental-new-constant-interpreter | FileCheck %s #ifdef __SIZEOF_INT128__ @@ -95,3 +95,12 @@ void f(A *a) { // CHECK: call void @_ZN1AD1Ev( A::E e3 = A().Foo; } + +int notdead() { + auto l = [c=0]() mutable { + return c++ < 5 ? 10 : 12; + }; + return l(); +} +// CHECK: _ZZ7notdeadvEN3$_0clEv +// CHECK: ret i32 %cond diff --git a/clang/test/CodeGen/AArch64/sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c b/clang/test/CodeGen/AArch64/sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c index bbed683ac1fd..c3d0541229fa 100644 --- a/clang/test/CodeGen/AArch64/sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c +++ b/clang/test/CodeGen/AArch64/sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c @@ -52,9 +52,8 @@ vec2048 x2048 = {0, 1, 2, 3, 3 , 2 , 1, 0, 0, 1, 2, 3, 3 , 2 , 1, 0, typedef int8_t vec_int8 __attribute__((vector_size(N / 8))); // CHECK128-LABEL: define{{.*}} <16 x i8> @f2(<16 x i8> noundef %x) // CHECK128-NEXT: entry: -// CHECK128-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) // CHECK128-NEXT: [[CASTSCALABLESVE:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> poison, <16 x i8> [[X:%.*]], i64 0) -// CHECK128-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.asrd.nxv16i8(<vscale x 16 x i1> [[TMP0]], <vscale x 16 x i8> [[CASTSCALABLESVE]], i32 1) +// CHECK128-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.asrd.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[CASTSCALABLESVE]], i32 1) // CHECK128-NEXT: [[CASTFIXEDSVE:%.*]] = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv16i8(<vscale x 16 x i8> [[TMP1]], i64 0) // CHECK128-NEXT: ret <16 x i8> [[CASTFIXEDSVE]] @@ -62,9 +61,8 @@ typedef int8_t vec_int8 __attribute__((vector_size(N / 8))); // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<[[#div(VBITS,8)]] x i8>) align 16 captures(none) initializes((0, [[#div(VBITS,8)]])) %agg.result, ptr noundef readonly captures(none) %0) // CHECK-NEXT: entry: // CHECK-NEXT: [[X:%.*]] = load <[[#div(VBITS,8)]] x i8>, ptr [[TMP0:%.*]], align 16, [[TBAA6:!tbaa !.*]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v[[#div(VBITS,8)]]i8(<vscale x 16 x i8> poison, <[[#div(VBITS,8)]] x i8> [[X]], i64 0) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.asrd.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[CASTSCALABLESVE]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.asrd.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[CASTSCALABLESVE]], i32 1) // CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = tail call <[[#div(VBITS,8)]] x i8> @llvm.vector.extract.v[[#div(VBITS,8)]]i8.nxv16i8(<vscale x 16 x i8> [[TMP2]], i64 0) // CHECK-NEXT: store <[[#div(VBITS,8)]] x i8> [[CASTFIXEDSVE]], ptr [[AGG_RESULT:%.*]], align 16, [[TBAA6]] // CHECK-NEXT: ret void diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rdffr.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rdffr.c index 6bf56bdea505..ca3480d62725 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rdffr.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rdffr.c @@ -7,14 +7,12 @@ // CHECK-LABEL: @test_svrdffr( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) -// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1> [[TMP0]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1> splat (i1 true)) // CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]] // // CPP-CHECK-LABEL: @_Z12test_svrdffrv( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1> [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1> splat (i1 true)) // CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]] // svbool_t test_svrdffr() diff --git a/clang/test/CodeGen/attr-target-x86.c b/clang/test/CodeGen/attr-target-x86.c index c92aad633082..e5067c1c3b07 100644 --- a/clang/test/CodeGen/attr-target-x86.c +++ b/clang/test/CodeGen/attr-target-x86.c @@ -56,7 +56,7 @@ void f_default2(void) { __attribute__((target("avx, sse4.2, arch= ivybridge"))) void f_avx_sse4_2_ivybridge_2(void) {} -// CHECK: [[f_no_aes_ivybridge]] = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-amx-avx512,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-vaes" +// CHECK: [[f_no_aes_ivybridge]] = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes" __attribute__((target("no-aes, arch=ivybridge"))) void f_no_aes_ivybridge(void) {} @@ -98,11 +98,11 @@ void f_x86_64_v3(void) {} __attribute__((target("arch=x86-64-v4"))) void f_x86_64_v4(void) {} -// CHECK: [[f_avx10_1_256]] = {{.*}}"target-cpu"="i686" "target-features"="+aes,+avx,+avx10.1-256,+avx2,+avx512bf16,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512fp16,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vpopcntdq,+cmov,+crc32,+cx8,+f16c,+fma,+mmx,+pclmul,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+vaes,+vpclmulqdq,+x87,+xsave,-amx-avx512,-avx10.1-512,-avx10.2-512,-evex512" +// CHECK: [[f_avx10_1_256]] = {{.*}}"target-cpu"="i686" "target-features"="+avx,+avx10.1-256,+avx2,+avx512bf16,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512fp16,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vpopcntdq,+cmov,+crc32,+cx8,+f16c,+fma,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,-amx-avx512,-avx10.1-512,-avx10.2-512,-evex512" __attribute__((target("avx10.1-256"))) void f_avx10_1_256(void) {} -// CHECK: [[f_avx10_1_512]] = {{.*}}"target-cpu"="i686" "target-features"="+aes,+avx,+avx10.1-256,+avx10.1-512,+avx2,+avx512bf16,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512fp16,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vpopcntdq,+cmov,+crc32,+cx8,+evex512,+f16c,+fma,+mmx,+pclmul,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+vaes,+vpclmulqdq,+x87,+xsave" +// CHECK: [[f_avx10_1_512]] = {{.*}}"target-cpu"="i686" "target-features"="+avx,+avx10.1-256,+avx10.1-512,+avx2,+avx512bf16,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512fp16,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vpopcntdq,+cmov,+crc32,+cx8,+evex512,+f16c,+fma,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" __attribute__((target("avx10.1-512"))) void f_avx10_1_512(void) {} @@ -112,4 +112,4 @@ void f_prefer_256_bit(void) {} // CHECK: [[f_no_prefer_256_bit]] = {{.*}}"target-features"="{{.*}}-prefer-256-bit __attribute__((target("no-prefer-256-bit"))) -void f_no_prefer_256_bit(void) {}
\ No newline at end of file +void f_no_prefer_256_bit(void) {} diff --git a/clang/test/CodeGen/builtin-maxnum-minnum.c b/clang/test/CodeGen/builtin-maxnum-minnum.c new file mode 100644 index 000000000000..69cec72495d3 --- /dev/null +++ b/clang/test/CodeGen/builtin-maxnum-minnum.c @@ -0,0 +1,171 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -x c++ -std=c++20 -disable-llvm-passes -O3 -triple x86_64 %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK + +typedef _Float16 half8 __attribute__((ext_vector_type(8))); +typedef __bf16 bf16x8 __attribute__((ext_vector_type(8))); +typedef float float4 __attribute__((ext_vector_type(4))); +typedef double double2 __attribute__((ext_vector_type(2))); +typedef long double ldouble2 __attribute__((ext_vector_type(2))); + +// CHECK-LABEL: define dso_local noundef <8 x half> @_Z7pfmin16Dv8_DF16_S_( +// CHECK-SAME: <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x half>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x half>, align 16 +// CHECK-NEXT: store <8 x half> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2:![0-9]+]] +// CHECK-NEXT: store <8 x half> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x half>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: [[ELT_MINNUM:%.*]] = call <8 x half> @llvm.minnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]]) +// CHECK-NEXT: ret <8 x half> [[ELT_MINNUM]] +// +half8 pfmin16(half8 a, half8 b) { + return __builtin_elementwise_minnum(a, b); +} +// CHECK-LABEL: define dso_local noundef <8 x bfloat> @_Z8pfmin16bDv8_DF16bS_( +// CHECK-SAME: <8 x bfloat> noundef [[A:%.*]], <8 x bfloat> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x bfloat>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x bfloat>, align 16 +// CHECK-NEXT: store <8 x bfloat> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x bfloat> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x bfloat>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x bfloat>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: [[ELT_MINNUM:%.*]] = call <8 x bfloat> @llvm.minnum.v8bf16(<8 x bfloat> [[TMP0]], <8 x bfloat> [[TMP1]]) +// CHECK-NEXT: ret <8 x bfloat> [[ELT_MINNUM]] +// +bf16x8 pfmin16b(bf16x8 a, bf16x8 b) { + return __builtin_elementwise_minnum(a, b); +} +// CHECK-LABEL: define dso_local noundef <4 x float> @_Z7pfmin32Dv4_fS_( +// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x float>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x float>, align 16 +// CHECK-NEXT: store <4 x float> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x float> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: [[ELT_MINNUM:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: ret <4 x float> [[ELT_MINNUM]] +// +float4 pfmin32(float4 a, float4 b) { + return __builtin_elementwise_minnum(a, b); +} +// CHECK-LABEL: define dso_local noundef <2 x double> @_Z7pfmin64Dv2_dS_( +// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x double>, align 16 +// CHECK-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: store <2 x double> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: [[ELT_MINNUM:%.*]] = call <2 x double> @llvm.minnum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: ret <2 x double> [[ELT_MINNUM]] +// +double2 pfmin64(double2 a, double2 b) { + return __builtin_elementwise_minnum(a, b); +} +// CHECK-LABEL: define dso_local noundef <2 x x86_fp80> @_Z7pfmin80Dv2_eS_( +// CHECK-SAME: ptr noundef byval(<2 x x86_fp80>) align 32 [[TMP0:%.*]], ptr noundef byval(<2 x x86_fp80>) align 32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x x86_fp80>, align 32 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x x86_fp80>, align 32 +// CHECK-NEXT: [[A:%.*]] = load <2 x x86_fp80>, ptr [[TMP0]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[B:%.*]] = load <2 x x86_fp80>, ptr [[TMP1]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <2 x x86_fp80> [[A]], ptr [[A_ADDR]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <2 x x86_fp80> [[B]], ptr [[B_ADDR]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = load <2 x x86_fp80>, ptr [[A_ADDR]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = load <2 x x86_fp80>, ptr [[B_ADDR]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[ELT_MINNUM:%.*]] = call <2 x x86_fp80> @llvm.minnum.v2f80(<2 x x86_fp80> [[TMP2]], <2 x x86_fp80> [[TMP3]]) +// CHECK-NEXT: ret <2 x x86_fp80> [[ELT_MINNUM]] +// +ldouble2 pfmin80(ldouble2 a, ldouble2 b) { + return __builtin_elementwise_minnum(a, b); +} + +// CHECK-LABEL: define dso_local noundef <8 x half> @_Z7pfmax16Dv8_DF16_S_( +// CHECK-SAME: <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x half>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x half>, align 16 +// CHECK-NEXT: store <8 x half> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x half> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x half>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: [[ELT_MAXNUM:%.*]] = call <8 x half> @llvm.maxnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]]) +// CHECK-NEXT: ret <8 x half> [[ELT_MAXNUM]] +// +half8 pfmax16(half8 a, half8 b) { + return __builtin_elementwise_maxnum(a, b); +} +// CHECK-LABEL: define dso_local noundef <8 x bfloat> @_Z8pfmax16bDv8_DF16bS_( +// CHECK-SAME: <8 x bfloat> noundef [[A:%.*]], <8 x bfloat> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x bfloat>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x bfloat>, align 16 +// CHECK-NEXT: store <8 x bfloat> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: store <8 x bfloat> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x bfloat>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x bfloat>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: [[ELT_MAXNUM:%.*]] = call <8 x bfloat> @llvm.maxnum.v8bf16(<8 x bfloat> [[TMP0]], <8 x bfloat> [[TMP1]]) +// CHECK-NEXT: ret <8 x bfloat> [[ELT_MAXNUM]] +// +bf16x8 pfmax16b(bf16x8 a, bf16x8 b) { + return __builtin_elementwise_maxnum(a, b); +} +// CHECK-LABEL: define dso_local noundef <4 x float> @_Z7pfmax32Dv4_fS_( +// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x float>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x float>, align 16 +// CHECK-NEXT: store <4 x float> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: store <4 x float> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: [[ELT_MAXNUM:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: ret <4 x float> [[ELT_MAXNUM]] +// +float4 pfmax32(float4 a, float4 b) { + return __builtin_elementwise_maxnum(a, b); +} +// CHECK-LABEL: define dso_local noundef <2 x double> @_Z7pfmax64Dv2_dS_( +// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x double>, align 16 +// CHECK-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: store <2 x double> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: [[ELT_MAXNUM:%.*]] = call <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: ret <2 x double> [[ELT_MAXNUM]] +// +double2 pfmax64(double2 a, double2 b) { + return __builtin_elementwise_maxnum(a, b); +} + +// CHECK-LABEL: define dso_local noundef <2 x x86_fp80> @_Z7pfmax80Dv2_eS_( +// CHECK-SAME: ptr noundef byval(<2 x x86_fp80>) align 32 [[TMP0:%.*]], ptr noundef byval(<2 x x86_fp80>) align 32 [[TMP1:%.*]]) #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x x86_fp80>, align 32 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x x86_fp80>, align 32 +// CHECK-NEXT: [[A:%.*]] = load <2 x x86_fp80>, ptr [[TMP0]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[B:%.*]] = load <2 x x86_fp80>, ptr [[TMP1]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <2 x x86_fp80> [[A]], ptr [[A_ADDR]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: store <2 x x86_fp80> [[B]], ptr [[B_ADDR]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = load <2 x x86_fp80>, ptr [[A_ADDR]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = load <2 x x86_fp80>, ptr [[B_ADDR]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[ELT_MINNUM:%.*]] = call <2 x x86_fp80> @llvm.minnum.v2f80(<2 x x86_fp80> [[TMP2]], <2 x x86_fp80> [[TMP3]]) +// CHECK-NEXT: ret <2 x x86_fp80> [[ELT_MINNUM]] +// +ldouble2 pfmax80(ldouble2 a, ldouble2 b) { + return __builtin_elementwise_minnum(a, b); +} + +//. +// CHECK: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} +// CHECK: [[META4]] = !{!"Simple C++ TBAA"} +//. diff --git a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl index 789aae7a5c34..49604c6c5e61 100644 --- a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl +++ b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl @@ -546,12 +546,10 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) { // // // AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember( -// AMDGCN-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { +// AMDGCN-SAME: ptr addrspace(4) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { // AMDGCN-NEXT: [[ENTRY:.*:]] // AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) -// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0 -// AMDGCN-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(4) align 8 [[TMP0]], i64 800, i1 false) // AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR4]] // AMDGCN-NEXT: ret void // @@ -596,20 +594,15 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) { // // // AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember( -// AMDGCN-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] { +// AMDGCN-SAME: ptr addrspace(4) noundef byref([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] { // AMDGCN-NEXT: [[ENTRY:.*:]] // AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5) -// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0 -// AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 -// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1 -// AMDGCN-NEXT: store <2 x i32> [[TMP3]], ptr addrspace(5) [[TMP2]], align 8 -// AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP4]], align 8 -// AMDGCN-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP6]], align 8 -// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR4]] +// AMDGCN-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(4) align 8 [[TMP0]], i64 16, i1 false) +// AMDGCN-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP1]], align 8 +// AMDGCN-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP3]], align 8 +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(<2 x i32> [[TMP2]], <2 x i32> [[TMP4]]) #[[ATTR4]] // AMDGCN-NEXT: ret void // // @@ -630,15 +623,10 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) { // // // AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember( -// AMDGCN-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] { +// AMDGCN-SAME: ptr addrspace(4) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] { // AMDGCN-NEXT: [[ENTRY:.*:]] // AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) -// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0 -// AMDGCN-NEXT: store [40 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 -// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1 -// AMDGCN-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr addrspace(5) [[TMP2]], align 8 +// AMDGCN-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(4) align 8 [[TMP0]], i64 480, i1 false) // AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR4]] // AMDGCN-NEXT: ret void // @@ -868,15 +856,13 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) { // // // AMDGCN20-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember( -// AMDGCN20-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { +// AMDGCN20-SAME: ptr addrspace(4) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { // AMDGCN20-NEXT: [[ENTRY:.*:]] -// AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) -// AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr -// AMDGCN20-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0 -// AMDGCN20-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0 -// AMDGCN20-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr [[TMP0]], align 8 -// AMDGCN20-NEXT: [[U1_ASCAST:%.*]] = addrspacecast ptr [[U1]] to ptr addrspace(5) -// AMDGCN20-NEXT: call void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U1_ASCAST]]) #[[ATTR4]] +// AMDGCN20-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) +// AMDGCN20-NEXT: [[U:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr +// AMDGCN20-NEXT: call void @llvm.memcpy.p0.p4.i64(ptr align 8 [[U]], ptr addrspace(4) align 8 [[TMP0]], i64 800, i1 false) +// AMDGCN20-NEXT: [[U_ASCAST:%.*]] = addrspacecast ptr [[U]] to ptr addrspace(5) +// AMDGCN20-NEXT: call void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U_ASCAST]]) #[[ATTR4]] // AMDGCN20-NEXT: ret void // // @@ -927,21 +913,16 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) { // // // AMDGCN20-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember( -// AMDGCN20-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] { +// AMDGCN20-SAME: ptr addrspace(4) noundef byref([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] { // AMDGCN20-NEXT: [[ENTRY:.*:]] -// AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5) -// AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr -// AMDGCN20-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0 -// AMDGCN20-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0 -// AMDGCN20-NEXT: store <2 x i32> [[TMP1]], ptr [[TMP0]], align 8 -// AMDGCN20-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1 -// AMDGCN20-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1 -// AMDGCN20-NEXT: store <2 x i32> [[TMP3]], ptr [[TMP2]], align 8 -// AMDGCN20-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0 -// AMDGCN20-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr [[TMP4]], align 8 -// AMDGCN20-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1 -// AMDGCN20-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8 -// AMDGCN20-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR4]] +// AMDGCN20-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN20-NEXT: [[U:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr +// AMDGCN20-NEXT: call void @llvm.memcpy.p0.p4.i64(ptr align 8 [[U]], ptr addrspace(4) align 8 [[TMP0]], i64 16, i1 false) +// AMDGCN20-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 0 +// AMDGCN20-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[TMP1]], align 8 +// AMDGCN20-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1 +// AMDGCN20-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr [[TMP3]], align 8 +// AMDGCN20-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(<2 x i32> [[TMP2]], <2 x i32> [[TMP4]]) #[[ATTR4]] // AMDGCN20-NEXT: ret void // // @@ -963,18 +944,13 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) { // // // AMDGCN20-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember( -// AMDGCN20-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] { +// AMDGCN20-SAME: ptr addrspace(4) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] { // AMDGCN20-NEXT: [[ENTRY:.*:]] -// AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) -// AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr -// AMDGCN20-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0 -// AMDGCN20-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0 -// AMDGCN20-NEXT: store [40 x <2 x i32>] [[TMP1]], ptr [[TMP0]], align 8 -// AMDGCN20-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1 -// AMDGCN20-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1 -// AMDGCN20-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr [[TMP2]], align 8 -// AMDGCN20-NEXT: [[U1_ASCAST:%.*]] = addrspacecast ptr [[U1]] to ptr addrspace(5) -// AMDGCN20-NEXT: call void @__clang_ocl_kern_imp_KernelLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U1_ASCAST]]) #[[ATTR4]] +// AMDGCN20-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN20-NEXT: [[U:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr +// AMDGCN20-NEXT: call void @llvm.memcpy.p0.p4.i64(ptr align 8 [[U]], ptr addrspace(4) align 8 [[TMP0]], i64 480, i1 false) +// AMDGCN20-NEXT: [[U_ASCAST:%.*]] = addrspacecast ptr [[U]] to ptr addrspace(5) +// AMDGCN20-NEXT: call void @__clang_ocl_kern_imp_KernelLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U_ASCAST]]) #[[ATTR4]] // AMDGCN20-NEXT: ret void // // @@ -1408,12 +1384,10 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) { // // // AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember( -// AMDGCN30-GVAR-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { +// AMDGCN30-GVAR-SAME: ptr addrspace(4) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { // AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] // AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) -// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0 -// AMDGCN30-GVAR-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(4) align 8 [[TMP0]], i64 800, i1 false) // AMDGCN30-GVAR-NEXT: call void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR4]] // AMDGCN30-GVAR-NEXT: ret void // @@ -1458,20 +1432,15 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) { // // // AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember( -// AMDGCN30-GVAR-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] { +// AMDGCN30-GVAR-SAME: ptr addrspace(4) noundef byref([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] { // AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] // AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5) -// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0 -// AMDGCN30-GVAR-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 -// AMDGCN30-GVAR-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN30-GVAR-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1 -// AMDGCN30-GVAR-NEXT: store <2 x i32> [[TMP3]], ptr addrspace(5) [[TMP2]], align 8 -// AMDGCN30-GVAR-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN30-GVAR-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP4]], align 8 -// AMDGCN30-GVAR-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN30-GVAR-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP6]], align 8 -// AMDGCN30-GVAR-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR4]] +// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(4) align 8 [[TMP0]], i64 16, i1 false) +// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-GVAR-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP1]], align 8 +// AMDGCN30-GVAR-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN30-GVAR-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP3]], align 8 +// AMDGCN30-GVAR-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(<2 x i32> [[TMP2]], <2 x i32> [[TMP4]]) #[[ATTR4]] // AMDGCN30-GVAR-NEXT: ret void // // @@ -1492,15 +1461,10 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) { // // // AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember( -// AMDGCN30-GVAR-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] { +// AMDGCN30-GVAR-SAME: ptr addrspace(4) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] { // AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] // AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) -// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0 -// AMDGCN30-GVAR-NEXT: store [40 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 -// AMDGCN30-GVAR-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN30-GVAR-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1 -// AMDGCN30-GVAR-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr addrspace(5) [[TMP2]], align 8 +// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(4) align 8 [[TMP0]], i64 480, i1 false) // AMDGCN30-GVAR-NEXT: call void @__clang_ocl_kern_imp_KernelLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR4]] // AMDGCN30-GVAR-NEXT: ret void // @@ -1699,12 +1663,10 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) { // // // AMDGCN30-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember( -// AMDGCN30-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { +// AMDGCN30-SAME: ptr addrspace(4) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { // AMDGCN30-NEXT: [[ENTRY:.*:]] // AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) -// AMDGCN30-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN30-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0 -// AMDGCN30-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN30-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(4) align 8 [[TMP0]], i64 800, i1 false) // AMDGCN30-NEXT: call void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR4]] // AMDGCN30-NEXT: ret void // @@ -1749,20 +1711,15 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) { // // // AMDGCN30-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember( -// AMDGCN30-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] { +// AMDGCN30-SAME: ptr addrspace(4) noundef byref([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] { // AMDGCN30-NEXT: [[ENTRY:.*:]] // AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5) -// AMDGCN30-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN30-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0 -// AMDGCN30-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 -// AMDGCN30-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN30-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1 -// AMDGCN30-NEXT: store <2 x i32> [[TMP3]], ptr addrspace(5) [[TMP2]], align 8 -// AMDGCN30-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN30-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP4]], align 8 -// AMDGCN30-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN30-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP6]], align 8 -// AMDGCN30-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR4]] +// AMDGCN30-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(4) align 8 [[TMP0]], i64 16, i1 false) +// AMDGCN30-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP1]], align 8 +// AMDGCN30-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN30-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP3]], align 8 +// AMDGCN30-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(<2 x i32> [[TMP2]], <2 x i32> [[TMP4]]) #[[ATTR4]] // AMDGCN30-NEXT: ret void // // @@ -1783,15 +1740,10 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) { // // // AMDGCN30-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember( -// AMDGCN30-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] { +// AMDGCN30-SAME: ptr addrspace(4) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] { // AMDGCN30-NEXT: [[ENTRY:.*:]] // AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) -// AMDGCN30-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN30-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0 -// AMDGCN30-NEXT: store [40 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 -// AMDGCN30-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN30-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1 -// AMDGCN30-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr addrspace(5) [[TMP2]], align 8 +// AMDGCN30-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(4) align 8 [[TMP0]], i64 480, i1 false) // AMDGCN30-NEXT: call void @__clang_ocl_kern_imp_KernelLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR4]] // AMDGCN30-NEXT: ret void // diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl index 6dc488c40da7..7d0a66bac146 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 --include-generated-funcs +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --version 5 // RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL2.0 -O0 -triple amdgcn | FileCheck -check-prefix=AMDGCN %s typedef int int2 __attribute__((ext_vector_type(2))); @@ -330,15 +330,13 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) { // // // AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember( -// AMDGCN-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { +// AMDGCN-SAME: ptr addrspace(4) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { // AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) -// AMDGCN-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr -// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0 -// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0 -// AMDGCN-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr [[TMP0]], align 8 -// AMDGCN-NEXT: [[U1_ASCAST:%.*]] = addrspacecast ptr [[U1]] to ptr addrspace(5) -// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U1_ASCAST]]) #[[ATTR4]] +// AMDGCN-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) +// AMDGCN-NEXT: [[U:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr +// AMDGCN-NEXT: call void @llvm.memcpy.p0.p4.i64(ptr align 8 [[U]], ptr addrspace(4) align 8 [[TMP0]], i64 800, i1 false) +// AMDGCN-NEXT: [[U_ASCAST:%.*]] = addrspacecast ptr [[U]] to ptr addrspace(5) +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U_ASCAST]]) #[[ATTR4]] // AMDGCN-NEXT: ret void // // @@ -389,21 +387,16 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) { // // // AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember( -// AMDGCN-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] { +// AMDGCN-SAME: ptr addrspace(4) noundef byref([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] { // AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5) -// AMDGCN-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr -// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0 -// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0 -// AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr [[TMP0]], align 8 -// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1 -// AMDGCN-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1 -// AMDGCN-NEXT: store <2 x i32> [[TMP3]], ptr [[TMP2]], align 8 -// AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0 -// AMDGCN-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr [[TMP4]], align 8 -// AMDGCN-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1 -// AMDGCN-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8 -// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR4]] +// AMDGCN-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN-NEXT: [[U:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr +// AMDGCN-NEXT: call void @llvm.memcpy.p0.p4.i64(ptr align 8 [[U]], ptr addrspace(4) align 8 [[TMP0]], i64 16, i1 false) +// AMDGCN-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[TMP1]], align 8 +// AMDGCN-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1 +// AMDGCN-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr [[TMP3]], align 8 +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(<2 x i32> [[TMP2]], <2 x i32> [[TMP4]]) #[[ATTR4]] // AMDGCN-NEXT: ret void // // @@ -425,18 +418,13 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) { // // // AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember( -// AMDGCN-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] { +// AMDGCN-SAME: ptr addrspace(4) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] { // AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) -// AMDGCN-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr -// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0 -// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0 -// AMDGCN-NEXT: store [40 x <2 x i32>] [[TMP1]], ptr [[TMP0]], align 8 -// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1 -// AMDGCN-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1 -// AMDGCN-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr [[TMP2]], align 8 -// AMDGCN-NEXT: [[U1_ASCAST:%.*]] = addrspacecast ptr [[U1]] to ptr addrspace(5) -// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U1_ASCAST]]) #[[ATTR4]] +// AMDGCN-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN-NEXT: [[U:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr +// AMDGCN-NEXT: call void @llvm.memcpy.p0.p4.i64(ptr align 8 [[U]], ptr addrspace(4) align 8 [[TMP0]], i64 480, i1 false) +// AMDGCN-NEXT: [[U_ASCAST:%.*]] = addrspacecast ptr [[U]] to ptr addrspace(5) +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U_ASCAST]]) #[[ATTR4]] // AMDGCN-NEXT: ret void // // diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl index 4d09fc3ffb70..06d3cdb01deb 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl @@ -214,7 +214,8 @@ typedef struct struct_4regs int w; } struct_4regs; -// CHECK: void @kernel_empty_struct_arg(%struct.empty_struct %s.coerce) +// CHECK: void @kernel_empty_struct_arg(ptr addrspace(4) noundef readnone byref(%struct.empty_struct) align 1 captures(none) {{%.+}}) +// CHECK: void @__clang_ocl_kern_imp_kernel_empty_struct_arg() __kernel void kernel_empty_struct_arg(empty_struct s) { } // CHECK: void @kernel_single_element_struct_arg(i32 %arg1.coerce) @@ -223,28 +224,35 @@ __kernel void kernel_single_element_struct_arg(single_element_struct_arg_t arg1) // CHECK: void @kernel_nested_single_element_struct_arg(i32 %arg1.coerce) __kernel void kernel_nested_single_element_struct_arg(nested_single_element_struct_arg_t arg1) { } -// CHECK: void @kernel_struct_arg(%struct.struct_arg %arg1.coerce) +// CHECK: void @kernel_struct_arg(ptr addrspace(4) noundef readonly byref(%struct.struct_arg) align 4 captures(none) {{%.+}}) +// CHECK: void @__clang_ocl_kern_imp_kernel_struct_arg(i32 %arg1.coerce0, float %arg1.coerce1, i32 %arg1.coerce2) __kernel void kernel_struct_arg(struct_arg_t arg1) { } -// CHECK: void @kernel_struct_padding_arg(%struct.struct_padding_arg %arg1.coerce) +// CHECK: void @kernel_struct_padding_arg(ptr addrspace(4) noundef readonly byref(%struct.struct_padding_arg) align 8 captures(none) {{%.+}}) +// CHECK: void @__clang_ocl_kern_imp_kernel_struct_padding_arg(i8 %arg1.coerce0, i64 %arg1.coerce1) __kernel void kernel_struct_padding_arg(struct_padding_arg arg1) { } -// CHECK: void @kernel_test_struct_of_arrays_arg(%struct.struct_of_arrays_arg %arg1.coerce) +// CHECK: void @kernel_test_struct_of_arrays_arg(ptr addrspace(4) noundef readonly byref(%struct.struct_of_arrays_arg) align 4 captures(none) {{%.+}}) +// CHECK: void @__clang_ocl_kern_imp_kernel_test_struct_of_arrays_arg([2 x i32] %arg1.coerce0, float %arg1.coerce1, [4 x i32] %arg1.coerce2, [3 x float] %arg1.coerce3, i32 %arg1.coerce4) __kernel void kernel_test_struct_of_arrays_arg(struct_of_arrays_arg_t arg1) { } -// CHECK: void @kernel_struct_of_structs_arg(%struct.struct_of_structs_arg %arg1.coerce) +// CHECK: void @kernel_struct_of_structs_arg(ptr addrspace(4) noundef readonly byref(%struct.struct_of_structs_arg) align 4 captures(none) {{%.+}}) +// CHECK: void @__clang_ocl_kern_imp_kernel_struct_of_structs_arg(i32 %arg1.coerce0, float %arg1.coerce1, %struct.struct_arg %arg1.coerce2, i32 %arg1.coerce3) __kernel void kernel_struct_of_structs_arg(struct_of_structs_arg_t arg1) { } // CHECK: void @test_kernel_transparent_union_arg(i32 %u.coerce) __kernel void test_kernel_transparent_union_arg(transparent_u u) { } -// CHECK: void @kernel_single_array_element_struct_arg(%struct.single_array_element_struct_arg %arg1.coerce) +// CHECK: void @kernel_single_array_element_struct_arg(ptr addrspace(4) noundef readonly byref(%struct.single_array_element_struct_arg) align 4 captures(none) {{%.+}}) +// CHECK: void @__clang_ocl_kern_imp_kernel_single_array_element_struct_arg([4 x i32] %arg1.coerce) __kernel void kernel_single_array_element_struct_arg(single_array_element_struct_arg_t arg1) { } -// CHECK: void @kernel_single_struct_element_struct_arg(%struct.single_struct_element_struct_arg %arg1.coerce) +// CHECK: void @kernel_single_struct_element_struct_arg(ptr addrspace(4) noundef readonly byref(%struct.single_struct_element_struct_arg) align 8 captures(none) {{%.+}}) +// CHECK: void @__clang_ocl_kern_imp_kernel_single_struct_element_struct_arg(%struct.inner %arg1.coerce) __kernel void kernel_single_struct_element_struct_arg(single_struct_element_struct_arg_t arg1) { } -// CHECK: void @kernel_different_size_type_pair_arg(%struct.different_size_type_pair %arg1.coerce) +// CHECK: void @kernel_different_size_type_pair_arg(ptr addrspace(4) noundef readonly byref(%struct.different_size_type_pair) align 8 captures(none) {{%.+}}) +// CHECK: void @__clang_ocl_kern_imp_kernel_different_size_type_pair_arg(i64 %arg1.coerce0, i32 %arg1.coerce1) __kernel void kernel_different_size_type_pair_arg(different_size_type_pair arg1) { } // CHECK: define{{.*}} void @func_f32_arg(float noundef %arg) diff --git a/clang/test/CodeGenOpenCL/opencl-kernel-call.cl b/clang/test/CodeGenOpenCL/opencl-kernel-call.cl index cdbe510b723b..a5b2bee127bd 100644 --- a/clang/test/CodeGenOpenCL/opencl-kernel-call.cl +++ b/clang/test/CodeGenOpenCL/opencl-kernel-call.cl @@ -676,12 +676,10 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct // // AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone // AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember( -// AMDGCN-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META7]] { +// AMDGCN-SAME: ptr addrspace(4) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META7]] { // AMDGCN-NEXT: entry: // AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) -// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0 -// AMDGCN-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(4) align 8 [[TMP0]], i64 800, i1 false) // AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR5]] // AMDGCN-NEXT: ret void // @@ -698,20 +696,15 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct // // AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone // AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember( -// AMDGCN-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META7]] { +// AMDGCN-SAME: ptr addrspace(4) noundef byref([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META7]] { // AMDGCN-NEXT: entry: // AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5) -// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0 -// AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 -// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1 -// AMDGCN-NEXT: store <2 x i32> [[TMP3]], ptr addrspace(5) [[TMP2]], align 8 -// AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP4]], align 8 -// AMDGCN-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP6]], align 8 -// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR5]] +// AMDGCN-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(4) align 8 [[TMP0]], i64 16, i1 false) +// AMDGCN-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP1]], align 8 +// AMDGCN-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP3]], align 8 +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(<2 x i32> [[TMP2]], <2 x i32> [[TMP4]]) #[[ATTR5]] // AMDGCN-NEXT: ret void // // @@ -734,15 +727,10 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct // // AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone // AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember( -// AMDGCN-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META7]] { +// AMDGCN-SAME: ptr addrspace(4) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META7]] { // AMDGCN-NEXT: entry: // AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) -// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0 -// AMDGCN-NEXT: store [40 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 -// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1 -// AMDGCN-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr addrspace(5) [[TMP2]], align 8 +// AMDGCN-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(4) align 8 [[TMP0]], i64 480, i1 false) // AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR5]] // AMDGCN-NEXT: ret void // @@ -815,28 +803,23 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct // // AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone // AMDGCN-LABEL: define dso_local amdgpu_kernel void @caller_kern2( -// AMDGCN-SAME: <2 x i32> [[STRUCTONEMEM_COERCE:%.*]], ptr addrspace(1) noundef align 8 [[GLOBAL_STRUCTONEMEM:%.*]], [[STRUCT_STRUCTTWOMEMBER:%.*]] [[STRUCTTWOMEM_COERCE:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META22:![0-9]+]] !kernel_arg_access_qual [[META23:![0-9]+]] !kernel_arg_type [[META24:![0-9]+]] !kernel_arg_base_type [[META24]] !kernel_arg_type_qual [[META25:![0-9]+]] { +// AMDGCN-SAME: <2 x i32> [[STRUCTONEMEM_COERCE:%.*]], ptr addrspace(1) noundef align 8 [[GLOBAL_STRUCTONEMEM:%.*]], ptr addrspace(4) noundef byref([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META22:![0-9]+]] !kernel_arg_access_qual [[META23:![0-9]+]] !kernel_arg_type [[META24:![0-9]+]] !kernel_arg_base_type [[META24]] !kernel_arg_type_qual [[META25:![0-9]+]] { // AMDGCN-NEXT: entry: // AMDGCN-NEXT: [[STRUCTONEMEM:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) // AMDGCN-NEXT: [[STRUCTTWOMEM:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5) // AMDGCN-NEXT: [[GLOBAL_STRUCTONEMEM_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[STRUCTONEMEM]], i32 0, i32 0 // AMDGCN-NEXT: store <2 x i32> [[STRUCTONEMEM_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8 -// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[STRUCTTWOMEM]], i32 0, i32 0 -// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[STRUCTTWOMEM_COERCE]], 0 -// AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 -// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[STRUCTTWOMEM]], i32 0, i32 1 -// AMDGCN-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[STRUCTTWOMEM_COERCE]], 1 -// AMDGCN-NEXT: store <2 x i32> [[TMP3]], ptr addrspace(5) [[TMP2]], align 8 +// AMDGCN-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 [[STRUCTTWOMEM]], ptr addrspace(4) align 8 [[TMP0]], i64 16, i1 false) // AMDGCN-NEXT: store ptr addrspace(1) [[GLOBAL_STRUCTONEMEM]], ptr addrspace(5) [[GLOBAL_STRUCTONEMEM_ADDR]], align 8 -// AMDGCN-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[GLOBAL_STRUCTONEMEM_ADDR]], align 8 +// AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[GLOBAL_STRUCTONEMEM_ADDR]], align 8 // AMDGCN-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[STRUCTONEMEM]], i32 0, i32 0 -// AMDGCN-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE1]], align 8 -// AMDGCN-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[STRUCTTWOMEM]], i32 0, i32 0 -// AMDGCN-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP6]], align 8 -// AMDGCN-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[STRUCTTWOMEM]], i32 0, i32 1 -// AMDGCN-NEXT: [[TMP9:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP8]], align 8 -// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_caller_kern2(<2 x i32> [[TMP5]], ptr addrspace(1) noundef align 8 [[TMP4]], <2 x i32> [[TMP7]], <2 x i32> [[TMP9]]) #[[ATTR5]] +// AMDGCN-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE1]], align 8 +// AMDGCN-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[STRUCTTWOMEM]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP3]], align 8 +// AMDGCN-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[STRUCTTWOMEM]], i32 0, i32 1 +// AMDGCN-NEXT: [[TMP6:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP5]], align 8 +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_caller_kern2(<2 x i32> [[TMP2]], ptr addrspace(1) noundef align 8 [[TMP1]], <2 x i32> [[TMP4]], <2 x i32> [[TMP6]]) #[[ATTR5]] // AMDGCN-NEXT: ret void // // @@ -875,19 +858,12 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct // // AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone // AMDGCN-LABEL: define dso_local amdgpu_kernel void @caller_kern3( -// AMDGCN-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[LARGESTRUCTONEMEM_COERCE:%.*]], [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[LARGESTRUCTTWOMEM_COERCE:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META26:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META27:![0-9]+]] !kernel_arg_base_type [[META27]] !kernel_arg_type_qual [[META11]] { +// AMDGCN-SAME: ptr addrspace(4) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]], ptr addrspace(4) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP1:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META26:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META27:![0-9]+]] !kernel_arg_base_type [[META27]] !kernel_arg_type_qual [[META11]] { // AMDGCN-NEXT: entry: // AMDGCN-NEXT: [[LARGESTRUCTONEMEM:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) // AMDGCN-NEXT: [[LARGESTRUCTTWOMEM:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) -// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[LARGESTRUCTONEMEM]], i32 0, i32 0 -// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[LARGESTRUCTONEMEM_COERCE]], 0 -// AMDGCN-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 -// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[LARGESTRUCTTWOMEM]], i32 0, i32 0 -// AMDGCN-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[LARGESTRUCTTWOMEM_COERCE]], 0 -// AMDGCN-NEXT: store [40 x <2 x i32>] [[TMP3]], ptr addrspace(5) [[TMP2]], align 8 -// AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[LARGESTRUCTTWOMEM]], i32 0, i32 1 -// AMDGCN-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[LARGESTRUCTTWOMEM_COERCE]], 1 -// AMDGCN-NEXT: store [20 x <2 x i32>] [[TMP5]], ptr addrspace(5) [[TMP4]], align 8 +// AMDGCN-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 [[LARGESTRUCTONEMEM]], ptr addrspace(4) align 8 [[TMP0]], i64 800, i1 false) +// AMDGCN-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 [[LARGESTRUCTTWOMEM]], ptr addrspace(4) align 8 [[TMP1]], i64 480, i1 false) // AMDGCN-NEXT: call void @__clang_ocl_kern_imp_caller_kern3(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[LARGESTRUCTONEMEM]], ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[LARGESTRUCTTWOMEM]]) #[[ATTR5]] // AMDGCN-NEXT: ret void // diff --git a/clang/test/Driver/amdgpu-openmp-toolchain.c b/clang/test/Driver/amdgpu-openmp-toolchain.c index 743f4e74696c..1091e6e372ac 100644 --- a/clang/test/Driver/amdgpu-openmp-toolchain.c +++ b/clang/test/Driver/amdgpu-openmp-toolchain.c @@ -54,12 +54,12 @@ // RUN: %clang -### -target x86_64-pc-linux-gnu -fopenmp --offload-arch=gfx803 \ // RUN: --no-offloadlib --offloadlib --rocm-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode %s 2>&1 | \ // RUN: FileCheck %s --check-prefix=CHECK-LIB-DEVICE -// CHECK-LIB-DEVICE: "-cc1" {{.*}}ocml.bc"{{.*}}oclc_unsafe_math_off.bc"{{.*}}oclc_finite_only_off.bc"{{.*}}oclc_correctly_rounded_sqrt_on.bc"{{.*}}oclc_wavefrontsize64_on.bc"{{.*}}oclc_isa_version_803.bc" +// CHECK-LIB-DEVICE: "-cc1" {{.*}}ocml.bc"{{.*}}oclc_daz_opt_on.bc"{{.*}}oclc_unsafe_math_off.bc"{{.*}}oclc_finite_only_off.bc"{{.*}}oclc_correctly_rounded_sqrt_on.bc"{{.*}}oclc_wavefrontsize64_on.bc"{{.*}}oclc_isa_version_803.bc" // RUN: %clang -### -target x86_64-pc-linux-gnu -fopenmp --offload-arch=gfx803 -nogpulib \ // RUN: --offloadlib --no-offloadlib --rocm-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode %s 2>&1 | \ // RUN: FileCheck %s --check-prefix=CHECK-LIB-DEVICE-NOGPULIB -// CHECK-LIB-DEVICE-NOGPULIB-NOT: "-cc1" {{.*}}ocml.bc"{{.*}}oclc_unsafe_math_off.bc"{{.*}}oclc_finite_only_off.bc"{{.*}}oclc_correctly_rounded_sqrt_on.bc"{{.*}}oclc_wavefrontsize64_on.bc"{{.*}}oclc_isa_version_803.bc" +// CHECK-LIB-DEVICE-NOGPULIB-NOT: "-cc1" {{.*}}ocml.bc"{{.*}}oclc_daz_opt_on.bc"{{.*}}oclc_unsafe_math_off.bc"{{.*}}oclc_finite_only_off.bc"{{.*}}oclc_correctly_rounded_sqrt_on.bc"{{.*}}oclc_wavefrontsize64_on.bc"{{.*}}oclc_isa_version_803.bc" // RUN: %clang -### -target x86_64-pc-linux-gnu -fopenmp --offload-arch=gfx90a:sramecc-:xnack+ \ // RUN: -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-TARGET-ID diff --git a/clang/test/Driver/arm-fpu-selection.s b/clang/test/Driver/arm-fpu-selection.s new file mode 100644 index 000000000000..6af374d04905 --- /dev/null +++ b/clang/test/Driver/arm-fpu-selection.s @@ -0,0 +1,36 @@ +// REQUIRES: arm-registered-target +/// Ensures that when targeting an ARM target with an Asm file, clang +/// collects the features from the FPU. This is critical in the +/// activation of NEON for supported targets. The Cortex-R52 will be +/// used and tested for VFP and NEON Support + +// RUN: %clang -target arm-none-eabi -mcpu=cortex-r52 -c %s -o /dev/null 2>&1 | FileCheck --check-prefix=CHECK-STDERR %s --allow-empty +// RUN: %clang -target arm-none-eabi -mcpu=cortex-r52 -c %s -o /dev/null -### 2>&1 | FileCheck --check-prefix=CHECK-TARGET-FEATURES %s + +/// Check that no errors or warnings are present when assembling using cc1as. +// CHECK-STDERR-NOT: error: +// CHECK-STDERR-NOT: warning: + +/// Check that NEON and VFPV5 have been activated when using Cortex-R52 when using cc1as +// CHECK-TARGET-FEATURES: "-target-feature" "+vfp2sp" +// CHECK-TARGET-FEATURES: "-target-feature" "+vfp3" +// CHECK-TARGET-FEATURES: "-target-feature" "+fp-armv8" +// CHECK-TARGET-FEATURES: "-target-feature" "+fp-armv8d16" +// CHECK-TARGET-FEATURES: "-target-feature" "+fp-armv8d16sp" +// CHECK-TARGET-FEATURES: "-target-feature" "+fp-armv8sp" +// CHECK-TARGET-FEATURES: "-target-feature" "+neon" + + vadd.f32 s0, s1, s2 + vadd.f64 d0, d1, d2 + vcvt.u32.f32 s0, s0, #1 + vcvt.u32.f64 d0, d0, #1 + vcvtb.f32.f16 s0, s1 + vcvtb.f64.f16 d0, s1 + vfma.f32 s0, s1, s2 + vfma.f64 d0, d1, d2 + vcvta.u32.f32 s0, s1 + vcvta.u32.f64 s0, d1 + vadd.f32 q0, q1, q2 + vcvt.f32.f16 q0, d1 + vfma.f32 q0, q1, q2 + vcvta.u32.f32 q0, q1 diff --git a/clang/test/Driver/armv7-default-neon.s b/clang/test/Driver/armv7-default-neon.s new file mode 100644 index 000000000000..2015f0bc429d --- /dev/null +++ b/clang/test/Driver/armv7-default-neon.s @@ -0,0 +1,16 @@ +/// Ensure that we can assemble NEON by just specifying an armv7 +/// Apple or Windows target. + +// REQUIRES: arm-registered-target +// RUN: %clang -c -target armv7-apple-darwin -o /dev/null %s 2>&1 | FileCheck --check-prefix=CHECK-STDERR %s --allow-empty +// RUN: %clang -c -target armv7-apple-darwin -o /dev/null %s -### 2>&1 | FileCheck --check-prefix=CHECK-TARGET-FEATURES %s +// RUN: %clang -c -target armv7-windows -o /dev/null %s 2>&1 | FileCheck --check-prefix=CHECK-STDERR %s --allow-empty +// RUN: %clang -c -target armv7-windows -o /dev/null %s -### 2>&1 | FileCheck --check-prefix=CHECK-TARGET-FEATURES %s + +/// Check that no errors or warnings are present when assembling using cc1as. +// CHECK-STDERR-NOT: error: +// CHECK-STDERR-NOT: warning: + +// CHECK-TARGET-FEATURES: "-target-feature" "+neon" + +vadd.i32 q0, q0, q0 diff --git a/clang/test/Driver/armv7s-default-vfpv4.s b/clang/test/Driver/armv7s-default-vfpv4.s new file mode 100644 index 000000000000..3e16503e6316 --- /dev/null +++ b/clang/test/Driver/armv7s-default-vfpv4.s @@ -0,0 +1,13 @@ +/// Ensure that we can assemble VFPv4 by just specifying an armv7s target. + +// REQUIRES: arm-registered-target +// RUN: %clang -c -target armv7s-apple-darwin -o /dev/null %s 2>&1 | FileCheck --check-prefix=CHECK-STDERR %s --allow-empty +// RUN: %clang -c -target armv7s-apple-darwin -o /dev/null %s -### 2>&1 | FileCheck --check-prefix=CHECK-TARGET-FEATURES %s + +/// Check that no errors or warnings are present when assembling using cc1as. +// CHECK-STDERR-NOT: error: +// CHECK-STDERR-NOT: warning: + +// CHECK-TARGET-FEATURES: "-target-feature" "+vfp4" + +vfma.f32 q1, q2, q3 diff --git a/clang/test/Driver/armv8.1m.main.s b/clang/test/Driver/armv8.1m.main.s index 8fc94cf772fa..a660e56c2d1e 100644 --- a/clang/test/Driver/armv8.1m.main.s +++ b/clang/test/Driver/armv8.1m.main.s @@ -8,21 +8,21 @@ # RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main+fp -o /dev/null %s 2>%t # RUN: FileCheck --check-prefix=ERROR-V81M_FP < %t %s # RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main+nofp -o /dev/null %s 2>%t -# RUN: FileCheck --check-prefix=ERROR-V81M_FP < %t %s +# RUN: FileCheck --check-prefix=ERROR-V81M_NOFP < %t %s # RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main+fp.dp -o /dev/null %s 2>%t # RUN: FileCheck --check-prefix=ERROR-V81M_FPDP < %t %s # RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main+nofp.dp -o /dev/null %s 2>%t -# RUN: FileCheck --check-prefix=ERROR-V81M_FPDP < %t %s +# RUN: FileCheck --check-prefix=ERROR-V81M_NOFPDP < %t %s # RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main+mve -o /dev/null %s 2>%t # RUN: FileCheck --check-prefix=ERROR-V81M_MVE < %t %s # RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main+nomve -o /dev/null %s 2>%t -# RUN: FileCheck --check-prefix=ERROR-V81M_MVE < %t %s +# RUN: FileCheck --check-prefix=ERROR-V81M_NOMVE < %t %s # RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main+mve+fp -o /dev/null %s 2>%t # RUN: FileCheck --check-prefix=ERROR-V81M_MVE_FP < %t %s # RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main+mve.fp -o /dev/null %s 2>%t # RUN: FileCheck --check-prefix=ERROR-V81M_MVEFP < %t %s # RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main+nomve.fp -o /dev/null %s 2>%t -# RUN: FileCheck --check-prefix=ERROR-V81M_MVEFP < %t %s +# RUN: FileCheck --check-prefix=ERROR-V81M_NOMVEFP < %t %s .syntax unified .thumb @@ -35,28 +35,41 @@ qadd r0, r1, r2 # ERROR-V8M: :[[@LINE-1]]:1: error # ERROR-V81M: :[[@LINE-2]]:1: error # ERROR-V81M_FP: :[[@LINE-3]]:1: error -# ERROR-V81M_FPDP: :[[@LINE-4]]:1: error +# ERROR-V81M_NOFP: :[[@LINE-4]]:1: error +# ERROR-V81M_FPDP: :[[@LINE-5]]:1: error +# ERROR-V81M_NOFPDP: :[[@LINE-6]]:1: error +# ERROR-V81M_NOMVE: :[[@LINE-7]]:1: error +# ERROR-V81M_NOMVEFP: :[[@LINE-8]]:1: error vadd.f16 s0, s1, s2 # ERROR-V8M: :[[@LINE-1]]:1: error -# ERROR-V81M: :[[@LINE-2]]:1: error -# ERROR-V81M_DSP: :[[@LINE-3]]:1: error -# ERROR-V81M_MVE: :[[@LINE-4]]:1: error +# ERROR-V81M_NOFP: :[[@LINE-2]]:1: error vabs.f32 s0, s1 -# ERROR-V8M: :[[@LINE-1]]:1: error -# ERROR-V81M: :[[@LINE-2]]:1: error -# ERROR-V81M_DSP: :[[@LINE-3]]:1: error -# ERROR-V81M_MVE: :[[@LINE-4]]:1: error +# ERROR-V81M_NOFP: :[[@LINE-1]]:1: error -vcmp.f64 d0,d1 +vabs.s32 q0, q1 # ERROR-V8M: :[[@LINE-1]]:1: error # ERROR-V81M: :[[@LINE-2]]:1: error # ERROR-V81M_DSP: :[[@LINE-3]]:1: error # ERROR-V81M_FP: :[[@LINE-4]]:1: error -# ERROR-V81M_MVE: :[[@LINE-5]]:1: error -# ERROR-V81M_MVE_FP: :[[@LINE-6]]:1: error -# ERROR-V81M_MVEFP: :[[@LINE-7]]:1: error +# ERROR-V81M_NOFP: :[[@LINE-5]]:1: error +# ERROR-V81M_FPDP: :[[@LINE-6]]:1: error +# ERROR-V81M_NOFPDP: :[[@LINE-7]]:1: error +# ERROR-V81M_NOMVE: :[[@LINE-8]]:1: error +# ERROR-V81M_NOMVEFP: :[[@LINE-9]]:1: error + +vcmp.f64 d0,d1 +# ERROR-V81M: :[[@LINE-1]]:1: error +# ERROR-V81M_DSP: :[[@LINE-2]]:1: error +# ERROR-V81M_FP: :[[@LINE-3]]:1: error +# ERROR-V81M_NOFP: :[[@LINE-4]]:1: error +# ERROR-V81M_NOFPDP: :[[@LINE-5]]:1: error +# ERROR-V81M_MVE: :[[@LINE-6]]:1: error +# ERROR-V81M_NOMVE: :[[@LINE-7]]:1: error +# ERROR-V81M_MVE_FP: :[[@LINE-8]]:1: error +# ERROR-V81M_MVEFP: :[[@LINE-9]]:1: error +# ERROR-V81M_NOMVEFP: :[[@LINE-10]]:1: error asrl r0, r1, r2 # ERROR-V8M: :[[@LINE-1]]:1: error @@ -64,6 +77,9 @@ asrl r0, r1, r2 # ERROR-V81M_DSP: :[[@LINE-3]]:1: error # ERROR-V81M_FP: :[[@LINE-4]]:1: error # ERROR-V81M_FPDP: :[[@LINE-5]]:1: error +# ERROR-V81M_NOFPDP: :[[@LINE-6]]:1: error +# ERROR-V81M_NOMVE: :[[@LINE-7]]:1: error +# ERROR-V81M_NOMVEFP: :[[@LINE-8]]:1: error vcadd.i8 q0, q1, q2, #90 # ERROR-V8M: :[[@LINE-1]]:1: error @@ -71,3 +87,6 @@ vcadd.i8 q0, q1, q2, #90 # ERROR-V81M_DSP: :[[@LINE-3]]:1: error # ERROR-V81M_FP: :[[@LINE-4]]:1: error # ERROR-V81M_FPDP: :[[@LINE-5]]:1: error +# ERROR-V81M_NOFPDP: :[[@LINE-6]]:1: error +# ERROR-V81M_NOMVE: :[[@LINE-7]]:1: error +# ERROR-V81M_NOMVEFP: :[[@LINE-8]]:1: error diff --git a/clang/test/Driver/hip-device-libs.hip b/clang/test/Driver/hip-device-libs.hip index b3829114138c..effce40d67eb 100644 --- a/clang/test/Driver/hip-device-libs.hip +++ b/clang/test/Driver/hip-device-libs.hip @@ -6,7 +6,7 @@ // RUN: --cuda-gpu-arch=gfx803 \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %S/Inputs/hip_multiple_inputs/b.hip \ -// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,ROCMDIR +// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,FLUSHD,ROCMDIR // Test subtarget with flushing off by ddefault. @@ -14,7 +14,7 @@ // RUN: --cuda-gpu-arch=gfx900 \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %S/Inputs/hip_multiple_inputs/b.hip \ -// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,ROCMDIR +// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,NOFLUSHD,ROCMDIR // Test explicit flag, opposite of target default. @@ -23,7 +23,7 @@ // RUN: -fgpu-flush-denormals-to-zero \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %S/Inputs/hip_multiple_inputs/b.hip \ -// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,ROCMDIR +// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,FLUSHD,ROCMDIR // Test explicit flag, opposite of target default. @@ -32,7 +32,7 @@ // RUN: -fno-gpu-flush-denormals-to-zero \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %S/Inputs/hip_multiple_inputs/b.hip \ -// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,ROCMDIR +// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,NOFLUSHD,ROCMDIR // Test explicit flag, same as target default. @@ -41,7 +41,7 @@ // RUN: -fno-gpu-flush-denormals-to-zero \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %S/Inputs/hip_multiple_inputs/b.hip \ -// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,ROCMDIR +// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,NOFLUSHD,ROCMDIR // Test explicit flag, same as target default. @@ -50,7 +50,7 @@ // RUN: -fgpu-flush-denormals-to-zero \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %S/Inputs/hip_multiple_inputs/b.hip \ -// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,ROCMDIR +// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,FLUSHD,ROCMDIR // Test last flag wins, not flushing @@ -59,7 +59,7 @@ // RUN: -fgpu-flush-denormals-to-zero -fno-gpu-flush-denormals-to-zero \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %S/Inputs/hip_multiple_inputs/b.hip \ -// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,ROCMDIR +// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,NOFLUSHD,ROCMDIR // RUN: %clang -### --target=x86_64-linux-gnu \ @@ -67,7 +67,7 @@ // RUN: -fgpu-flush-denormals-to-zero -fno-gpu-flush-denormals-to-zero \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %S/Inputs/hip_multiple_inputs/b.hip \ -// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,ROCMDIR +// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,NOFLUSHD,ROCMDIR // RUN: %clang -### --target=x86_64-linux-gnu \ @@ -75,7 +75,7 @@ // RUN: -fno-gpu-flush-denormals-to-zero -fgpu-flush-denormals-to-zero \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %S/Inputs/hip_multiple_inputs/b.hip \ -// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,ROCMDIR +// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,FLUSHD,ROCMDIR // RUN: %clang -### --target=x86_64-linux-gnu \ @@ -83,21 +83,21 @@ // RUN: -fno-gpu-flush-denormals-to-zero -fgpu-flush-denormals-to-zero \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %S/Inputs/hip_multiple_inputs/b.hip \ -// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,ROCMDIR +// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,FLUSHD,ROCMDIR // Test finding device lib in resource dir // RUN: %clang -### --target=x86_64-linux-gnu \ // RUN: --offload-arch=gfx803 -nogpuinc \ // RUN: -resource-dir=%S/Inputs/rocm_resource_dir \ // RUN: %S/Inputs/hip_multiple_inputs/b.hip \ -// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,RESDIR +// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,FLUSHD,RESDIR // Test --hip-device-lib-path flag // RUN: %clang -### --target=x86_64-linux-gnu \ // RUN: --cuda-gpu-arch=gfx803 -nogpuinc \ // RUN: --hip-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode \ // RUN: %S/Inputs/hip_multiple_inputs/b.hip \ -// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,ROCMDIR +// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,FLUSHD,ROCMDIR // Test --hip-device-lib-path wins over -resource-dir // RUN: %clang -### --target=x86_64-linux-gnu \ @@ -105,7 +105,7 @@ // RUN: --hip-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode \ // RUN: -resource-dir=%S/Inputs/rocm_resource_dir \ // RUN: %S/Inputs/hip_multiple_inputs/b.hip \ -// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,ROCMDIR +// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,FLUSHD,ROCMDIR // Test environment variable HIP_DEVICE_LIB_PATH // RUN: env HIP_DEVICE_LIB_PATH=%S/Inputs/rocm/amdgcn/bitcode \ @@ -213,6 +213,9 @@ // ALL-SAME: "-mlink-builtin-bitcode" "[[DEVICELIB_DIR]]ockl.bc" +// FLUSHD-SAME: "-mlink-builtin-bitcode" "[[DEVICELIB_DIR]]oclc_daz_opt_on.bc" +// NOFLUSHD-SAME: "-mlink-builtin-bitcode" "[[DEVICELIB_DIR]]oclc_daz_opt_off.bc" + // ALL-SAME: "-mlink-builtin-bitcode" "[[DEVICELIB_DIR]]oclc_unsafe_math_off.bc" // ALL-SAME: "-mlink-builtin-bitcode" "[[DEVICELIB_DIR]]oclc_finite_only_off.bc" // ALL-SAME: "-mlink-builtin-bitcode" "[[DEVICELIB_DIR]]oclc_correctly_rounded_sqrt_on.bc" @@ -220,19 +223,23 @@ // ALL-SAME: "-mlink-builtin-bitcode" "[[DEVICELIB_DIR]]oclc_isa_version_{{[0-9]+}}.bc" // INST-SAME: "-mlink-builtin-bitcode" "{{.*}}instrument.bc" -// FAST: "-mlink-builtin-bitcode" "{{.*}}oclc_unsafe_math_on.bc" +// FAST: "-mlink-builtin-bitcode" "{{.*}}oclc_daz_opt_off.bc" +// FAST-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_unsafe_math_on.bc" // FAST-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_finite_only_on.bc" // FAST-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_correctly_rounded_sqrt_on.bc" -// FINITE: "-mlink-builtin-bitcode" "{{.*}}oclc_unsafe_math_off.bc" +// FINITE: "-mlink-builtin-bitcode" "{{.*}}oclc_daz_opt_off.bc" +// FINITE-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_unsafe_math_off.bc" // FINITE-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_finite_only_on.bc" // FINITE-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_correctly_rounded_sqrt_on.bc" -// UNSAFE: "-mlink-builtin-bitcode" "{{.*}}oclc_unsafe_math_on.bc" +// UNSAFE: "-mlink-builtin-bitcode" "{{.*}}oclc_daz_opt_off.bc" +// UNSAFE-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_unsafe_math_on.bc" // UNSAFE-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_finite_only_off.bc" // UNSAFE-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_correctly_rounded_sqrt_on.bc" -// DIVSQRT: "-mlink-builtin-bitcode" "{{.*}}oclc_unsafe_math_off.bc" +// DIVSQRT: "-mlink-builtin-bitcode" "{{.*}}oclc_daz_opt_off.bc" +// DIVSQRT-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_unsafe_math_off.bc" // DIVSQRT-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_finite_only_off.bc" // DIVSQRT-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_correctly_rounded_sqrt_off.bc" diff --git a/clang/test/Driver/rocm-device-libs.cl b/clang/test/Driver/rocm-device-libs.cl index 7aee10bf1556..f9766e6fa4d9 100644 --- a/clang/test/Driver/rocm-device-libs.cl +++ b/clang/test/Driver/rocm-device-libs.cl @@ -6,7 +6,7 @@ // RUN: -x cl -mcpu=gfx900 \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %s \ -// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,COMMON-DEFAULT,GFX900,WAVE64 %s +// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,COMMON-DEFAULT,GFX900-DEFAULT,GFX900,WAVE64 %s @@ -15,7 +15,7 @@ // RUN: -x cl -mcpu=gfx803 \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %s \ -// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,COMMON-DEFAULT,GFX803,WAVE64 %s +// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,COMMON-DEFAULT,GFX803-DEFAULT,GFX803,WAVE64 %s @@ -24,7 +24,7 @@ // RUN: -x cl -mcpu=fiji \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %s \ -// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,COMMON-DEFAULT,GFX803,WAVE64 %s +// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,COMMON-DEFAULT,GFX803-DEFAULT,GFX803,WAVE64 %s @@ -33,7 +33,7 @@ // RUN: -cl-denorms-are-zero \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %s \ -// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,GFX900,WAVE64 %s +// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,COMMON-DAZ,GFX900,WAVE64 %s // RUN: %clang -### -target amdgcn-amd-amdhsa \ @@ -41,7 +41,7 @@ // RUN: -cl-denorms-are-zero \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %s \ -// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,GFX803,WAVE64 %s +// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,COMMON-DAZ,GFX803,WAVE64 %s @@ -124,13 +124,13 @@ // RUN: -x cl -mcpu=gfx900 \ // RUN: --hip-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode \ // RUN: %S/opencl.cl \ -// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,COMMON-DEFAULT,GFX900,WAVE64 %s +// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,COMMON-DEFAULT,GFX900-DEFAULT,GFX900,WAVE64 %s // Test environment variable HIP_DEVICE_LIB_PATH // RUN: env HIP_DEVICE_LIB_PATH=%S/Inputs/rocm/amdgcn/bitcode %clang -### -target amdgcn-amd-amdhsa \ // RUN: -x cl -mcpu=gfx900 \ // RUN: %S/opencl.cl \ -// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,COMMON-DEFAULT,GFX900,WAVE64 %s +// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,COMMON-DEFAULT,GFX900-DEFAULT,GFX900,WAVE64 %s // RUN: %clang -### -target amdgcn-amd-amdhsa \ // RUN: -x cl -mcpu=gfx908:xnack+ -fsanitize=address \ @@ -150,6 +150,11 @@ // COMMON-SAME: "-mlink-builtin-bitcode" "{{.*}}/amdgcn/bitcode/ocml.bc" // COMMON-SAME: "-mlink-builtin-bitcode" "{{.*}}/amdgcn/bitcode/ockl.bc" +// GFX900-DEFAULT-SAME: "-mlink-builtin-bitcode" "{{.*}}/amdgcn/bitcode/oclc_daz_opt_off.bc" +// GFX803-DEFAULT-SAME: "-mlink-builtin-bitcode" "{{.*}}/amdgcn/bitcode/oclc_daz_opt_on.bc" +// GFX700-DEFAULT-SAME: "-mlink-builtin-bitcode" "{{.*}}/amdgcn/bitcode/oclc_daz_opt_on.bc" +// COMMON-DAZ-SAME: "-mlink-builtin-bitcode" "{{.*}}/amdgcn/bitcode/oclc_daz_opt_on.bc" + // COMMON-DEFAULT-SAME: "-mlink-builtin-bitcode" "{{.*}}/amdgcn/bitcode/oclc_unsafe_math_off.bc" // COMMON-DEFAULT-SAME: "-mlink-builtin-bitcode" "{{.*}}/amdgcn/bitcode/oclc_finite_only_off.bc" diff --git a/clang/test/Modules/relocatable-modules.cpp b/clang/test/Modules/relocatable-modules.cpp new file mode 100644 index 000000000000..c8d1e6d45566 --- /dev/null +++ b/clang/test/Modules/relocatable-modules.cpp @@ -0,0 +1,54 @@ +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// RUN: cd %t + +// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header hu-01.h \ +// RUN: -fmodule-name=hu-01 -o hu-01.pcm + +// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header hu-02.h \ +// RUN: -Wno-experimental-header-units -fmodule-file=hu-01.pcm -o hu-02-abs.pcm + +// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header hu-02.h \ +// RUN: -Wno-experimental-header-units -fmodule-file=hu-01.pcm -o hu-02-rel.pcm \ +// RUN: -fmodule-file-home-is-cwd + +// RUN: %clang -module-file-info hu-02-abs.pcm | FileCheck %s --check-prefix=IMPORT-ABS -DPREFIX=%t +// IMPORT-ABS: Imports module 'hu-01': [[PREFIX]]{{/|\\}}hu-01.pcm + +// RUN: %clang -module-file-info hu-02-rel.pcm | FileCheck %s --check-prefix=IMPORT-REL +// IMPORT-REL: Imports module 'hu-01': hu-01.pcm + +// RUN: llvm-bcanalyzer --dump --disable-histogram %t/hu-02-abs.pcm \ +// RUN: | FileCheck %s --check-prefix=INPUT-ABS -DPREFIX=%t +// INPUT-ABS: <INPUT_FILE {{.*}}/> blob data = '[[PREFIX]]{{/|\\}}hu-02.h' + +// RUN: llvm-bcanalyzer --dump --disable-histogram %t/hu-02-rel.pcm \ +// RUN: | FileCheck %s --check-prefix=INPUT-REL +// INPUT-REL: <INPUT_FILE {{.*}}/> blob data = 'hu-02.h' + +//--- hu-01.h +inline void f() {} + +//--- hu-02.h +import "hu-01.h"; + +inline void g() { + f(); +} + +// RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/a.cppm -o %t/a-abs.pcm + +// RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/a.cppm -o %t/a-rel.pcm \ +// RUN: -fmodule-file-home-is-cwd + +// RUN: llvm-bcanalyzer --dump --disable-histogram %t/a-abs.pcm \ +// RUN: | FileCheck %s --check-prefix=M-INPUT-ABS -DPREFIX=%t +// M-INPUT-ABS: <INPUT_FILE {{.*}}/> blob data = '[[PREFIX]]{{/|\\}}a.cppm' + +// RUN: llvm-bcanalyzer --dump --disable-histogram %t/a-rel.pcm \ +// RUN: | FileCheck %s --check-prefix=M-INPUT-REL +// M-INPUT-REL: <INPUT_FILE {{.*}}/> blob data = 'a.cppm' + +//--- a.cppm +export module a; diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index bf3260c6216d..b62d49e17c83 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -20207,6 +20207,16 @@ TEST_F(FormatTest, AlignConsecutiveDeclarations) { "double b();", AlignmentLeft); + auto Style = AlignmentLeft; + Style.AlignConsecutiveDeclarations.AlignFunctionPointers = true; + Style.BinPackParameters = FormatStyle::BPPS_OnePerLine; + verifyFormat("int function_name(const wchar_t* title,\n" + " int x = 0,\n" + " long extraStyle = 0,\n" + " bool readOnly = false,\n" + " FancyClassType* module = nullptr);", + Style); + // PAS_Middle FormatStyle AlignmentMiddle = Alignment; AlignmentMiddle.PointerAlignment = FormatStyle::PAS_Middle; @@ -20438,7 +20448,7 @@ TEST_F(FormatTest, AlignConsecutiveDeclarations) { Alignment); // See PR37175 - FormatStyle Style = getMozillaStyle(); + Style = getMozillaStyle(); Style.AlignConsecutiveDeclarations.Enabled = true; verifyFormat("DECOR1 /**/ int8_t /**/ DECOR2 /**/\n" "foo(int a);", @@ -23712,6 +23722,7 @@ TEST_F(FormatTest, FormatsLambdas) { verifyFormat("function([]() { return b; })", MergeInline); verifyFormat("function([]() { return b; }, a)", MergeInline); verifyFormat("function(a, []() { return b; })", MergeInline); + verifyFormat("auto guard = foo{[&] { exit_status = true; }};", MergeInline); // Check option "BraceWrapping.BeforeLambdaBody" and different state of // AllowShortLambdasOnASingleLine diff --git a/clang/utils/TableGen/ClangOpcodesEmitter.cpp b/clang/utils/TableGen/ClangOpcodesEmitter.cpp index 64534a50877e..5d6d90994cf3 100644 --- a/clang/utils/TableGen/ClangOpcodesEmitter.cpp +++ b/clang/utils/TableGen/ClangOpcodesEmitter.cpp @@ -171,16 +171,12 @@ void ClangOpcodesEmitter::EmitDisasm(raw_ostream &OS, StringRef N, OS << "#ifdef GET_DISASM\n"; Enumerate(R, N, [R, &OS](ArrayRef<const Record *>, const Twine &ID) { OS << "case OP_" << ID << ":\n"; - OS << " PrintName(\"" << ID << "\");\n"; - OS << " OS << \"\\t\""; + OS << " Text.Op = PrintName(\"" << ID << "\");\n"; + for (const auto *Arg : R->getValueAsListOfDefs("Args")) + OS << " Text.Args.push_back(printArg<" << Arg->getValueAsString("Name") + << ">(P, PC));\n"; - for (const auto *Arg : R->getValueAsListOfDefs("Args")) { - OS << " << ReadArg<" << Arg->getValueAsString("Name") << ">(P, PC)"; - OS << " << \" \""; - } - - OS << " << \"\\n\";\n"; - OS << " continue;\n"; + OS << " break;\n"; }); OS << "#endif\n"; } diff --git a/clang/utils/TableGen/RISCVVEmitter.cpp b/clang/utils/TableGen/RISCVVEmitter.cpp index 02e5e51f6d09..8d94ec3d920d 100644 --- a/clang/utils/TableGen/RISCVVEmitter.cpp +++ b/clang/utils/TableGen/RISCVVEmitter.cpp @@ -45,7 +45,7 @@ struct SemaRecord { unsigned Log2LMULMask; // Required extensions for this intrinsic. - uint32_t RequiredExtensions[(RVV_REQ_NUM + 31) / 32]; + RequiredExtensionBits RequiredExtensions; // Prototype for this intrinsic. SmallVector<PrototypeDescriptor> Prototype; @@ -769,7 +769,6 @@ void RVVEmitter::createRVVIntrinsics( SR.Log2LMULMask = Log2LMULMask; - memset(SR.RequiredExtensions, 0, sizeof(SR.RequiredExtensions)); for (auto RequiredFeature : RequiredFeatures) { unsigned RequireExt = StringSwitch<RVVRequire>(RequiredFeature) @@ -793,7 +792,7 @@ void RVVEmitter::createRVVIntrinsics( .Case("Zvfbfmin", RVV_REQ_Zvfbfmin) .Case("Zvfh", RVV_REQ_Zvfh) .Case("Experimental", RVV_REQ_Experimental); - SR.RequiredExtensions[RequireExt / 32] |= 1U << (RequireExt % 32); + SR.RequiredExtensions.set(RequireExt); } SR.NF = NF; @@ -837,8 +836,7 @@ void RVVEmitter::createRVVIntrinsicRecords(std::vector<RVVIntrinsicRecord> &Out, R.PrototypeLength = SR.Prototype.size(); R.SuffixLength = SR.Suffix.size(); R.OverloadedSuffixSize = SR.OverloadedSuffix.size(); - memcpy(R.RequiredExtensions, SR.RequiredExtensions, - sizeof(R.RequiredExtensions)); + R.RequiredExtensions = SR.RequiredExtensions; R.TypeRangeMask = SR.TypeRangeMask; R.Log2LMULMask = SR.Log2LMULMask; R.NF = SR.NF; diff --git a/flang/test/Driver/flang-ld-powerpc.f90 b/flang/test/Driver/flang-ld-powerpc.f90 index 9a6ee453a22e..a58cb1629cad 100644 --- a/flang/test/Driver/flang-ld-powerpc.f90 +++ b/flang/test/Driver/flang-ld-powerpc.f90 @@ -7,35 +7,100 @@ !! LLVM_ENABLE_PER_TARGET_RUNTIME_DIR=ON, use !! resource_dir_with_per_target_subdir as inputs. -! Check powerpc64-ibm-aix 64-bit linking to static flang-rt -! RUN: %flang %s -### 2>&1 \ +! Check powerpc64-ibm-aix 64-bit linking to static flang-rt by default +! RUN: %flang -Werror %s -### 2>&1 \ ! RUN: --target=powerpc64-ibm-aix \ ! RUN: -resource-dir=%S/../../../clang/test/Driver/Inputs/resource_dir_with_per_target_subdir \ -! RUN: | FileCheck %s --check-prefix=AIX64-LD-PER-TARGET - -! AIX64-LD-PER-TARGET-NOT: warning: -! AIX64-LD-PER-TARGET: "-fc1" "-triple" "powerpc64-ibm-aix" -! AIX64-LD-PER-TARGET-SAME: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]" -! AIX64-LD-PER-TARGET: "{{.*}}ld{{(.exe)?}}" -! AIX64-LD-PER-TARGET-NOT: "-bnso" -! AIX64-LD-PER-TARGET-SAME: "-b64" -! AIX64-LD-PER-TARGET-SAME: "-bpT:0x100000000" "-bpD:0x110000000" -! AIX64-LD-PER-TARGET-SAME: "-lc" -! AIX64-LD-PER-TARGET-SAME: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}powerpc64-ibm-aix{{/|\\\\}}libflang_rt.runtime.a" -! AIX64-LD-PER-TARGET-SAME: "-lm" -! AIX64-LD-PER-TARGET-SAME: "-lpthread" - -! Check powerpc64le-unknown-linux-gnu 64-bit linking to static flang-rt -! RUN: %flang %s -### 2>&1 \ +! RUN: | FileCheck %s --check-prefix=AIX64-LD-PER-TARGET-DEFAULT + +! AIX64-LD-PER-TARGET-DEFAULT: "-fc1" "-triple" "powerpc64-ibm-aix" +! AIX64-LD-PER-TARGET-DEFAULT-SAME: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]" +! AIX64-LD-PER-TARGET-DEFAULT: "{{.*}}ld{{(.exe)?}}" +! AIX64-LD-PER-TARGET-DEFAULT-NOT: "-bnso" +! AIX64-LD-PER-TARGET-DEFAULT-SAME: "-b64" +! AIX64-LD-PER-TARGET-DEFAULT-SAME: "-bpT:0x100000000" "-bpD:0x110000000" +! AIX64-LD-PER-TARGET-DEFAULT-SAME: "-lc" +! AIX64-LD-PER-TARGET-DEFAULT-SAME: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}powerpc64-ibm-aix{{/|\\\\}}libflang_rt.runtime.a" +! AIX64-LD-PER-TARGET-DEFAULT-SAME: "-lm" +! AIX64-LD-PER-TARGET-DEFAULT-SAME: "-lpthread" + + +! Check powerpc64-ibm-aix 64-bit linking to static flang-rt by option +! RUN: %flang -static-libflangrt -Werror %s -### 2>&1 \ +! RUN: --target=powerpc64-ibm-aix \ +! RUN: -resource-dir=%S/../../../clang/test/Driver/Inputs/resource_dir_with_per_target_subdir \ +! RUN: | FileCheck %s --check-prefix=AIX64-LD-PER-TARGET-STATIC + +! AIX64-LD-PER-TARGET-STATIC: "-fc1" "-triple" "powerpc64-ibm-aix" +! AIX64-LD-PER-TARGET-STATIC-SAME: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]" +! AIX64-LD-PER-TARGET-STATIC: "{{.*}}ld{{(.exe)?}}" +! AIX64-LD-PER-TARGET-STATIC-NOT: "-bnso" +! AIX64-LD-PER-TARGET-STATIC-SAME: "-b64" +! AIX64-LD-PER-TARGET-STATIC-SAME: "-bpT:0x100000000" "-bpD:0x110000000" +! AIX64-LD-PER-TARGET-STATIC-SAME: "-lc" +! AIX64-LD-PER-TARGET-STATIC-SAME: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}powerpc64-ibm-aix{{/|\\\\}}libflang_rt.runtime.a" +! AIX64-LD-PER-TARGET-STATIC-SAME: "-lm" +! AIX64-LD-PER-TARGET-STATIC-SAME: "-lpthread" + + +! Check powerpc64-ibm-aix 64-bit linking to shared flang-rt by option +! RUN: %flang -shared-libflangrt -Werror %s -### 2>&1 \ +! RUN: --target=powerpc64-ibm-aix \ +! RUN: -resource-dir=%S/../../../clang/test/Driver/Inputs/resource_dir_with_per_target_subdir \ +! RUN: | FileCheck %s --check-prefix=AIX64-LD-PER-TARGET-SHARED + +! AIX64-LD-PER-TARGET-SHARED: "-fc1" "-triple" "powerpc64-ibm-aix" +! AIX64-LD-PER-TARGET-SHARED-SAME: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]" +! AIX64-LD-PER-TARGET-SHARED: "{{.*}}ld{{(.exe)?}}" +! AIX64-LD-PER-TARGET-SHARED-NOT: "-bnso" +! AIX64-LD-PER-TARGET-SHARED-SAME: "-b64" +! AIX64-LD-PER-TARGET-SHARED-SAME: "-bpT:0x100000000" "-bpD:0x110000000" +! AIX64-LD-PER-TARGET-SHARED-SAME: "-lc" +! AIX64-LD-PER-TARGET-SHARED-SAME: "-lflang_rt.runtime" +! AIX64-LD-PER-TARGET-SHARED-SAME: "-lm" +! AIX64-LD-PER-TARGET-SHARED-SAME: "-lpthread" + + +! Check powerpc64le-unknown-linux-gnu 64-bit linking to shared flang-rt by default +! RUN: %flang -Werror %s -### 2>&1 \ +! RUN: --target=powerpc64le-unknown-linux-gnu \ +! RUN: -resource-dir=%S/../../../clang/test/Driver/Inputs/resource_dir_with_per_target_subdir \ +! RUN: | FileCheck %s --check-prefixes=LOP64-LD-PER-TARGET-DEFAULT + +! LOP64-LD-PER-TARGET-DEFAULT: "-fc1" "-triple" "powerpc64le-unknown-linux-gnu" +! LOP64-LD-PER-TARGET-DEFAULT-SAME: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]" +! LOP64-LD-PER-TARGET-DEFAULT: "{{.*}}ld{{(.exe)?}}" +! LOP64-LD-PER-TARGET-DEFAULT-NOT: "-bnso" +! LOP64-LD-PER-TARGET-DEFAULT-SAME: "-lflang_rt.runtime" +! LOP64-LD-PER-TARGET-DEFAULT-SAME: "-lm" +! LOP64-LD-PER-TARGET-DEFAULT-SAME: "-lc" + + +! Check powerpc64le-unknown-linux-gnu 64-bit linking to static flang-rt by option +! RUN: %flang -static-libflangrt -Werror %s -### 2>&1 \ ! RUN: --target=powerpc64le-unknown-linux-gnu \ ! RUN: -resource-dir=%S/../../../clang/test/Driver/Inputs/resource_dir_with_per_target_subdir \ -! RUN: | FileCheck %s --check-prefixes=LOP64-LD-PER-TARGET - -! LOP64-LD-PER-TARGET-NOT: warning: -! LOP64-LD-PER-TARGET: "-fc1" "-triple" "powerpc64le-unknown-linux-gnu" -! LOP64-LD-PER-TARGET-SAME: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]" -! LOP64-LD-PER-TARGET: "{{.*}}ld{{(.exe)?}}" -! LOP64-LD-PER-TARGET-NOT: "-bnso" -! LOP64-LD-PER-TARGET-SAME: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}powerpc64le-unknown-linux-gnu{{/|\\\\}}libflang_rt.runtime.a" -! LOP64-LD-PER-TARGET-SAME: "-lm" -! LOP64-LD-PER-TARGET-SAME: "-lc" +! RUN: | FileCheck %s --check-prefixes=LOP64-LD-PER-TARGET-STATIC + +! LOP64-LD-PER-TARGET-STATIC: "-fc1" "-triple" "powerpc64le-unknown-linux-gnu" +! LOP64-LD-PER-TARGET-STATIC-SAME: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]" +! LOP64-LD-PER-TARGET-STATIC: "{{.*}}ld{{(.exe)?}}" +! LOP64-LD-PER-TARGET-STATIC-NOT: "-bnso" +! LOP64-LD-PER-TARGET-STATIC-SAME: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}powerpc64le-unknown-linux-gnu{{/|\\\\}}libflang_rt.runtime.a" +! LOP64-LD-PER-TARGET-STATIC-SAME: "-lm" +! LOP64-LD-PER-TARGET-STATIC-SAME: "-lc" + + +! Check powerpc64le-unknown-linux-gnu 64-bit linking to shared flang-rt by option +! RUN: %flang -shared-libflangrt -Werror %s -### 2>&1 \ +! RUN: --target=powerpc64le-unknown-linux-gnu \ +! RUN: -resource-dir=%S/../../../clang/test/Driver/Inputs/resource_dir_with_per_target_subdir \ +! RUN: | FileCheck %s --check-prefixes=LOP64-LD-PER-TARGET-SHARED + +! LOP64-LD-PER-TARGET-SHARED: "-fc1" "-triple" "powerpc64le-unknown-linux-gnu" +! LOP64-LD-PER-TARGET-SHARED-SAME: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]" +! LOP64-LD-PER-TARGET-SHARED: "{{.*}}ld{{(.exe)?}}" +! LOP64-LD-PER-TARGET-SHARED-NOT: "-bnso" +! LOP64-LD-PER-TARGET-SHARED-SAME: "-lflang_rt.runtime" +! LOP64-LD-PER-TARGET-SHARED-SAME: "-lm" +! LOP64-LD-PER-TARGET-SHARED-SAME: "-lc" diff --git a/flang/test/Driver/linker-flags.f90 b/flang/test/Driver/linker-flags.f90 index 20104276d2e4..ad48ea1b9e9b 100644 --- a/flang/test/Driver/linker-flags.f90 +++ b/flang/test/Driver/linker-flags.f90 @@ -2,15 +2,16 @@ ! invocation. These libraries are added on top of other standard runtime ! libraries that the Clang driver will include. -! RUN: %flang -### --target=ppc64le-linux-gnu %S/Inputs/hello.f90 2>&1 | FileCheck %s --check-prefixes=CHECK,UNIX,UNIX-F128NONE -! RUN: %flang -### --target=aarch64-apple-darwin %S/Inputs/hello.f90 2>&1 | FileCheck %s --check-prefixes=CHECK,DARWIN,DARWIN-F128%f128-lib +! RUN: %flang -### --target=ppc64le-linux-gnu %S/Inputs/hello.f90 2>&1 | FileCheck %s --check-prefixes=CHECK,UNIX,UNIX-F128%f128-lib ! RUN: %flang -### --target=sparc-sun-solaris2.11 %S/Inputs/hello.f90 2>&1 | FileCheck %s --check-prefixes=CHECK,UNIX,SOLARIS-F128%f128-lib ! RUN: %flang -### --target=x86_64-unknown-freebsd %S/Inputs/hello.f90 2>&1 | FileCheck %s --check-prefixes=CHECK,BSD,BSD-F128%f128-lib ! RUN: %flang -### --target=x86_64-unknown-netbsd %S/Inputs/hello.f90 2>&1 | FileCheck %s --check-prefixes=CHECK,BSD,BSD-F128%f128-lib ! RUN: %flang -### --target=x86_64-unknown-openbsd %S/Inputs/hello.f90 2>&1 | FileCheck %s --check-prefixes=CHECK,BSD,BSD-F128%f128-lib ! RUN: %flang -### --target=x86_64-unknown-dragonfly %S/Inputs/hello.f90 2>&1 | FileCheck %s --check-prefixes=CHECK,BSD,BSD-F128%f128-lib +! RUN: %flang -### --target=aarch64-apple-darwin %S/Inputs/hello.f90 2>&1 | FileCheck %s --check-prefixes=CHECK,DARWIN,DARWIN-F128%f128-lib ! RUN: %flang -### --target=x86_64-unknown-haiku %S/Inputs/hello.f90 2>&1 | FileCheck %s --check-prefixes=CHECK,HAIKU,HAIKU-F128%f128-lib ! RUN: %flang -### --target=x86_64-windows-gnu %S/Inputs/hello.f90 2>&1 | FileCheck %s --check-prefixes=CHECK,MINGW,MINGW-F128%f128-lib + ! RUN: %flang -### -rtlib=compiler-rt --target=aarch64-linux-gnu %S/Inputs/hello.f90 2>&1 | FileCheck %s --check-prefixes=CHECK,UNIX,COMPILER-RT ! NOTE: Clang's driver library, clangDriver, usually adds 'oldnames' on Windows, @@ -18,6 +19,16 @@ ! additional dependencies. Make sure its not added. ! RUN: %flang -### --target=aarch64-windows-msvc -fuse-ld= %S/Inputs/hello.f90 2>&1 | FileCheck %s --check-prefixes=CHECK,MSVC --implicit-check-not oldnames +! RUN: %flang -### --target=ppc64le-linux-gnu -static-libflangrt %S/Inputs/hello.f90 2>&1 | FileCheck %s --check-prefixes=UNIX-STATIC-FLANGRT +! RUN: %flang -### --target=sparc-sun-solaris2.11 -static-libflangrt %S/Inputs/hello.f90 2>&1 | FileCheck %s --check-prefixes=UNIX-STATIC-FLANGRT +! RUN: %flang -### --target=x86_64-unknown-freebsd -static-libflangrt %S/Inputs/hello.f90 2>&1 | FileCheck %s --check-prefixes=BSD-STATIC-FLANGRT +! RUN: %flang -### --target=x86_64-unknown-netbsd -static-libflangrt %S/Inputs/hello.f90 2>&1 | FileCheck %s --check-prefixes=BSD-STATIC-FLANGRT +! RUN: %flang -### --target=x86_64-unknown-openbsd -static-libflangrt %S/Inputs/hello.f90 2>&1 | FileCheck %s --check-prefixes=BSD-STATIC-FLANGRT +! RUN: %flang -### --target=x86_64-unknown-dragonfly -static-libflangrt %S/Inputs/hello.f90 2>&1 | FileCheck %s --check-prefixes=CHECK,BSD-STATIC-FLANGRT +! RUN: %flang -### --target=aarch64-apple-darwin -static-libflangrt %S/Inputs/hello.f90 2>&1 | FileCheck %s --check-prefixes=DARWIN-STATIC-FLANGRT +! RUN: %flang -### --target=x86_64-unknown-haiku -static-libflangrt %S/Inputs/hello.f90 2>&1 | FileCheck %s --check-prefixes=HAIKU-STATIC-FLANGRT +! RUN: %flang -### --target=x86_64-windows-gnu -static-libflangrt %S/Inputs/hello.f90 2>&1 | FileCheck %s --check-prefixes=MINGW-STATIC-FLANGRT + ! Compiler invocation to generate the object file ! CHECK-LABEL: {{.*}} "-emit-obj" ! CHECK-SAME: "-o" "[[object_file:.*\.o]]" {{.*}}Inputs/hello.f90 @@ -35,6 +46,7 @@ ! SOLARIS-F128LIBQUADMATH-SAME: "-lflang_rt.quadmath" "-z" "ignore" "-lquadmath" "-z" "record" ! UNIX-SAME: "-lflang_rt.runtime" "-lm" ! COMPILER-RT: "{{.*}}{{\\|/}}libclang_rt.builtins.a" +! UNIX-STATIC-FLANGRT: "{{.*}}{{\\|/}}libflang_rt.runtime.a" ! BSD-LABEL: "{{.*}}ld{{(\.exe)?}}" ! BSD-SAME: "[[object_file]]" @@ -42,24 +54,28 @@ ! BSD-F128LIBQUADMATH-SAME: "-lflang_rt.quadmath" "--as-needed" "-lquadmath" "--no-as-needed" ! BSD-SAME: -lflang_rt.runtime ! BSD-SAME: -lexecinfo +! BSD-STATIC-FLANGRT: "{{.*}}{{\\|/}}libflang_rt.runtime.a" ! DARWIN-LABEL: "{{.*}}ld{{(\.exe)?}}" ! DARWIN-SAME: "[[object_file]]" ! DARWIN-F128NONE-NOT: libflang_rt.quadmath ! DARWIN-F128LIBQUADMATH-SAME: "-lflang_rt.quadmath" "--as-needed" "-lquadmath" "--no-as-needed" ! DARWIN-SAME: -lflang_rt.runtime +! DARWIN-STATIC-FLANGRT: "{{.*}}{{\\|/}}libclang_rt.runtime_osx.a" ! HAIKU-LABEL: "{{.*}}ld{{(\.exe)?}}" ! HAIKU-SAME: "[[object_file]]" ! HAIKU-F128NONE-NOT: libflang_rt.quadmath ! HAIKU-F128LIBQUADMATH-SAME: "-lflang_rt.quadmath" "--as-needed" "-lquadmath" "--no-as-needed" ! HAIKU-SAME: "-lflang_rt.runtime" +! HAIKU-STATIC-FLANGRT: "{{.*}}{{\\|/}}libflang_rt.runtime.a" ! MINGW-LABEL: "{{.*}}ld{{(\.exe)?}}" ! MINGW-SAME: "[[object_file]]" ! MINGW-F128NONE-NOT: libflang_rt.quadmath ! MINGW-F128LIBQUADMATH-SAME: "-lflang_rt.quadmath" "--as-needed" "-lquadmath" "--no-as-needed" ! MINGW-SAME: -lflang_rt.runtime +! MINGW-STATIC-FLANGRT: "{{.*}}{{\\|/}}libflang_rt.runtime.a" ! NOTE: This also matches lld-link (when CLANG_DEFAULT_LINKER=lld) and ! any .exe suffix that is added when resolving to the full path of diff --git a/flang/test/Driver/omp-driver-offload.f90 b/flang/test/Driver/omp-driver-offload.f90 index 13d605484506..335bfad4b188 100644 --- a/flang/test/Driver/omp-driver-offload.f90 +++ b/flang/test/Driver/omp-driver-offload.f90 @@ -184,7 +184,7 @@ ! RUN: %flang -### -target x86_64-pc-linux-gnu -fopenmp --offload-arch=gfx900 \ ! RUN: --rocm-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode %s 2>&1 | \ ! RUN: FileCheck %s --check-prefix=ROCM-DEVICE-LIB -! ROCM-DEVICE-LIB: "-fc1" {{.*}}ocml.bc"{{.*}}oclc_unsafe_math_off.bc"{{.*}}oclc_finite_only_off.bc"{{.*}}oclc_correctly_rounded_sqrt_on.bc"{{.*}}oclc_wavefrontsize64_on.bc"{{.*}}oclc_isa_version_900.bc" +! ROCM-DEVICE-LIB: "-fc1" {{.*}}ocml.bc"{{.*}}oclc_daz_opt_off.bc"{{.*}}oclc_unsafe_math_off.bc"{{.*}}oclc_finite_only_off.bc"{{.*}}oclc_correctly_rounded_sqrt_on.bc"{{.*}}oclc_wavefrontsize64_on.bc"{{.*}}oclc_isa_version_900.bc" ! Test -fopenmp-force-usm option without offload ! RUN: %flang -S -### %s -o %t 2>&1 \ diff --git a/libcxx/include/__bit/popcount.h b/libcxx/include/__bit/popcount.h index b1d93caab008..622c8efd7938 100644 --- a/libcxx/include/__bit/popcount.h +++ b/libcxx/include/__bit/popcount.h @@ -6,9 +6,6 @@ // //===----------------------------------------------------------------------===// -// TODO: __builtin_popcountg is available since Clang 19 and GCC 14. When support for older versions is dropped, we can -// refactor this code to exclusively use __builtin_popcountg. - #ifndef _LIBCPP___BIT_POPCOUNT_H #define _LIBCPP___BIT_POPCOUNT_H @@ -27,50 +24,10 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned __x) _NOEXCEPT { - return __builtin_popcount(__x); -} - -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned long __x) _NOEXCEPT { - return __builtin_popcountl(__x); -} - -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned long long __x) _NOEXCEPT { - return __builtin_popcountll(__x); -} - -template <class _Tp> -[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __popcount_impl(_Tp __t) _NOEXCEPT { - if _LIBCPP_CONSTEXPR (sizeof(_Tp) <= sizeof(unsigned int)) { - return std::__libcpp_popcount(static_cast<unsigned int>(__t)); - } else if _LIBCPP_CONSTEXPR (sizeof(_Tp) <= sizeof(unsigned long)) { - return std::__libcpp_popcount(static_cast<unsigned long>(__t)); - } else if _LIBCPP_CONSTEXPR (sizeof(_Tp) <= sizeof(unsigned long long)) { - return std::__libcpp_popcount(static_cast<unsigned long long>(__t)); - } else { -#if _LIBCPP_STD_VER == 11 - return __t != 0 ? std::__libcpp_popcount(static_cast<unsigned long long>(__t)) + - std::__popcount_impl<_Tp>(__t >> numeric_limits<unsigned long long>::digits) - : 0; -#else - int __ret = 0; - while (__t != 0) { - __ret += std::__libcpp_popcount(static_cast<unsigned long long>(__t)); - __t >>= std::numeric_limits<unsigned long long>::digits; - } - return __ret; -#endif - } -} - template <class _Tp> [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __popcount(_Tp __t) _NOEXCEPT { static_assert(is_unsigned<_Tp>::value, "__popcount only works with unsigned types"); -#if __has_builtin(__builtin_popcountg) // TODO (LLVM 21): This can be dropped once we only support Clang >= 19. return __builtin_popcountg(__t); -#else - return std::__popcount_impl(__t); -#endif } #if _LIBCPP_STD_VER >= 20 diff --git a/libcxx/include/__stop_token/atomic_unique_lock.h b/libcxx/include/__stop_token/atomic_unique_lock.h index a698260ac7bb..05e8f223167f 100644 --- a/libcxx/include/__stop_token/atomic_unique_lock.h +++ b/libcxx/include/__stop_token/atomic_unique_lock.h @@ -28,7 +28,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD // and LockedBit is the value of State when the lock bit is set, e.g 1 << 2 template <class _State, _State _LockedBit> class _LIBCPP_AVAILABILITY_SYNC __atomic_unique_lock { - static_assert(std::__libcpp_popcount(static_cast<unsigned long long>(_LockedBit)) == 1, + static_assert(std::__popcount(static_cast<unsigned long long>(_LockedBit)) == 1, "LockedBit must be an integer where only one bit is set"); std::atomic<_State>& __state_; diff --git a/libunwind/docs/BuildingLibunwind.rst b/libunwind/docs/BuildingLibunwind.rst index 8b4f1207d4ba..c231587fd502 100644 --- a/libunwind/docs/BuildingLibunwind.rst +++ b/libunwind/docs/BuildingLibunwind.rst @@ -91,7 +91,7 @@ libunwind specific options .. option:: LIBUNWIND_ENABLE_WERROR:BOOL - **Default**: ``ON`` + **Default**: ``OFF`` Compile with -Werror diff --git a/lldb/include/lldb/Symbol/SymbolContext.h b/lldb/include/lldb/Symbol/SymbolContext.h index 8b6317c6f33c..4f8405f1f0db 100644 --- a/lldb/include/lldb/Symbol/SymbolContext.h +++ b/lldb/include/lldb/Symbol/SymbolContext.h @@ -307,6 +307,13 @@ public: SymbolContext &next_frame_sc, Address &inlined_frame_addr) const; + /// If available, will return the function name according to the specified + /// mangling preference. If this object represents an inlined function, + /// returns the name of the inlined function. Returns nullptr if no function + /// name could be determined. + const char *GetPossiblyInlinedFunctionName( + Mangled::NamePreference mangling_preference) const; + // Member variables lldb::TargetSP target_sp; ///< The Target for a given query lldb::ModuleSP module_sp; ///< The Module for a given query diff --git a/lldb/include/lldb/Target/Language.h b/lldb/include/lldb/Target/Language.h index b699a90aff8e..da2c2cc451da 100644 --- a/lldb/include/lldb/Target/Language.h +++ b/lldb/include/lldb/Target/Language.h @@ -268,7 +268,7 @@ public: // the reference has never been assigned virtual bool IsUninitializedReference(ValueObject &valobj); - virtual bool GetFunctionDisplayName(const SymbolContext *sc, + virtual bool GetFunctionDisplayName(const SymbolContext &sc, const ExecutionContext *exe_ctx, FunctionNameRepresentation representation, Stream &s); diff --git a/lldb/source/Core/FormatEntity.cpp b/lldb/source/Core/FormatEntity.cpp index a9370595c11e..23e5999bd80c 100644 --- a/lldb/source/Core/FormatEntity.cpp +++ b/lldb/source/Core/FormatEntity.cpp @@ -1147,19 +1147,6 @@ static void PrettyPrintFunctionNameWithArgs(Stream &out_stream, out_stream.PutChar(')'); } -static void FormatInlinedBlock(Stream &out_stream, Block *block) { - if (!block) - return; - Block *inline_block = block->GetContainingInlinedBlock(); - if (inline_block) { - if (const InlineFunctionInfo *inline_info = - inline_block->GetInlinedFunctionInfo()) { - out_stream.PutCString(" [inlined] "); - inline_info->GetName().Dump(&out_stream); - } - } -} - static VariableListSP GetFunctionVariableList(const SymbolContext &sc) { assert(sc.function); @@ -1170,22 +1157,6 @@ static VariableListSP GetFunctionVariableList(const SymbolContext &sc) { return sc.function->GetBlock(true).GetBlockVariableList(true); } -static char const *GetInlinedFunctionName(const SymbolContext &sc) { - if (!sc.block) - return nullptr; - - const Block *inline_block = sc.block->GetContainingInlinedBlock(); - if (!inline_block) - return nullptr; - - const InlineFunctionInfo *inline_info = - inline_block->GetInlinedFunctionInfo(); - if (!inline_info) - return nullptr; - - return inline_info->GetName().AsCString(nullptr); -} - static bool PrintFunctionNameWithArgs(Stream &s, const ExecutionContext *exe_ctx, const SymbolContext &sc) { @@ -1194,16 +1165,11 @@ static bool PrintFunctionNameWithArgs(Stream &s, ExecutionContextScope *exe_scope = exe_ctx ? exe_ctx->GetBestExecutionContextScope() : nullptr; - const char *cstr = sc.function->GetName().AsCString(nullptr); + const char *cstr = + sc.GetPossiblyInlinedFunctionName(Mangled::ePreferDemangled); if (!cstr) return false; - if (const char *inlined_name = GetInlinedFunctionName(sc)) { - s.PutCString(cstr); - s.PutCString(" [inlined] "); - cstr = inlined_name; - } - VariableList args; if (auto variable_list_sp = GetFunctionVariableList(sc)) variable_list_sp->AppendVariablesWithScope(eValueTypeVariableArgument, @@ -1218,6 +1184,40 @@ static bool PrintFunctionNameWithArgs(Stream &s, return true; } +static bool HandleFunctionNameWithArgs(Stream &s,const ExecutionContext *exe_ctx, + const SymbolContext &sc) { + Language *language_plugin = nullptr; + bool language_plugin_handled = false; + StreamString ss; + if (sc.function) + language_plugin = Language::FindPlugin(sc.function->GetLanguage()); + else if (sc.symbol) + language_plugin = Language::FindPlugin(sc.symbol->GetLanguage()); + + if (language_plugin) + language_plugin_handled = language_plugin->GetFunctionDisplayName( + sc, exe_ctx, Language::FunctionNameRepresentation::eNameWithArgs, ss); + + if (language_plugin_handled) { + s << ss.GetString(); + return true; + } + + if (sc.function) + return PrintFunctionNameWithArgs(s, exe_ctx, sc); + + if (!sc.symbol) + return false; + + const char *cstr = sc.symbol->GetName().AsCString(nullptr); + if (!cstr) + return false; + + s.PutCString(cstr); + + return true; +} + bool FormatEntity::FormatStringRef(const llvm::StringRef &format_str, Stream &s, const SymbolContext *sc, const ExecutionContext *exe_ctx, @@ -1719,63 +1719,24 @@ bool FormatEntity::Format(const Entry &entry, Stream &s, if (language_plugin) language_plugin_handled = language_plugin->GetFunctionDisplayName( - sc, exe_ctx, Language::FunctionNameRepresentation::eName, ss); + *sc, exe_ctx, Language::FunctionNameRepresentation::eName, ss); if (language_plugin_handled) { s << ss.GetString(); return true; - } else { - const char *name = nullptr; - if (sc->function) - name = sc->function->GetName().AsCString(nullptr); - else if (sc->symbol) - name = sc->symbol->GetName().AsCString(nullptr); - - if (name) { - s.PutCString(name); - FormatInlinedBlock(s, sc->block); - return true; - } } - } - return false; - case Entry::Type::FunctionNameNoArgs: { - if (!sc) + const char *name = sc->GetPossiblyInlinedFunctionName( + Mangled::NamePreference::ePreferDemangled); + if (!name) return false; - Language *language_plugin = nullptr; - bool language_plugin_handled = false; - StreamString ss; - if (sc->function) - language_plugin = Language::FindPlugin(sc->function->GetLanguage()); - else if (sc->symbol) - language_plugin = Language::FindPlugin(sc->symbol->GetLanguage()); - - if (language_plugin) - language_plugin_handled = language_plugin->GetFunctionDisplayName( - sc, exe_ctx, Language::FunctionNameRepresentation::eNameWithNoArgs, - ss); + s.PutCString(name); - if (language_plugin_handled) { - s << ss.GetString(); - return true; - } else { - ConstString name; - if (sc->function) - name = sc->function->GetNameNoArguments(); - else if (sc->symbol) - name = sc->symbol->GetNameNoArguments(); - if (name) { - s.PutCString(name.GetCString()); - FormatInlinedBlock(s, sc->block); - return true; - } - } + return true; } - return false; - case Entry::Type::FunctionNameWithArgs: { + case Entry::Type::FunctionNameNoArgs: { if (!sc) return false; @@ -1789,44 +1750,42 @@ bool FormatEntity::Format(const Entry &entry, Stream &s, if (language_plugin) language_plugin_handled = language_plugin->GetFunctionDisplayName( - sc, exe_ctx, Language::FunctionNameRepresentation::eNameWithArgs, ss); + *sc, exe_ctx, Language::FunctionNameRepresentation::eNameWithNoArgs, + ss); if (language_plugin_handled) { s << ss.GetString(); return true; } - if (sc->function) - return PrintFunctionNameWithArgs(s, exe_ctx, *sc); - - if (!sc->symbol) + const char *name = sc->GetPossiblyInlinedFunctionName( + Mangled::NamePreference::ePreferDemangledWithoutArguments); + if (!name) return false; - const char *cstr = sc->symbol->GetName().AsCString(nullptr); - if (!cstr) - return false; + s.PutCString(name); - s.PutCString(cstr); return true; } - case Entry::Type::FunctionMangledName: { + case Entry::Type::FunctionNameWithArgs: { if (!sc) return false; - const char *name = nullptr; - if (sc->symbol) - name = - sc->symbol->GetMangled().GetName(Mangled::ePreferMangled).AsCString(); - else if (sc->function) - name = sc->function->GetMangled() - .GetName(Mangled::ePreferMangled) - .AsCString(); + return HandleFunctionNameWithArgs(s, exe_ctx, *sc); + } + case Entry::Type::FunctionMangledName: { + if (!sc) + return false; + + const char *name = sc->GetPossiblyInlinedFunctionName( + Mangled::NamePreference::ePreferMangled); if (!name) return false; + s.PutCString(name); - FormatInlinedBlock(s, sc->block); + return true; } case Entry::Type::FunctionAddrOffset: diff --git a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp index a6fdf66f13e4..9bd48ec55022 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp @@ -1707,22 +1707,6 @@ static VariableListSP GetFunctionVariableList(const SymbolContext &sc) { return sc.function->GetBlock(true).GetBlockVariableList(true); } -static char const *GetInlinedFunctionName(const SymbolContext &sc) { - if (!sc.block) - return nullptr; - - const Block *inline_block = sc.block->GetContainingInlinedBlock(); - if (!inline_block) - return nullptr; - - const InlineFunctionInfo *inline_info = - inline_block->GetInlinedFunctionInfo(); - if (!inline_info) - return nullptr; - - return inline_info->GetName().AsCString(nullptr); -} - static bool PrintFunctionNameWithArgs(Stream &s, const ExecutionContext *exe_ctx, const SymbolContext &sc) { @@ -1731,16 +1715,11 @@ static bool PrintFunctionNameWithArgs(Stream &s, ExecutionContextScope *exe_scope = exe_ctx ? exe_ctx->GetBestExecutionContextScope() : nullptr; - const char *cstr = sc.function->GetName().AsCString(nullptr); + const char *cstr = sc.GetPossiblyInlinedFunctionName( + Mangled::NamePreference::ePreferDemangled); if (!cstr) return false; - if (const char *inlined_name = GetInlinedFunctionName(sc)) { - s.PutCString(cstr); - s.PutCString(" [inlined] "); - cstr = inlined_name; - } - VariableList args; if (auto variable_list_sp = GetFunctionVariableList(sc)) variable_list_sp->AppendVariablesWithScope(eValueTypeVariableArgument, @@ -1757,20 +1736,18 @@ static bool PrintFunctionNameWithArgs(Stream &s, } bool CPlusPlusLanguage::GetFunctionDisplayName( - const SymbolContext *sc, const ExecutionContext *exe_ctx, + const SymbolContext &sc, const ExecutionContext *exe_ctx, FunctionNameRepresentation representation, Stream &s) { switch (representation) { case FunctionNameRepresentation::eNameWithArgs: { - assert(sc); - // Print the function name with arguments in it - if (sc->function) - return PrintFunctionNameWithArgs(s, exe_ctx, *sc); + if (sc.function) + return PrintFunctionNameWithArgs(s, exe_ctx, sc); - if (!sc->symbol) + if (!sc.symbol) return false; - const char *cstr = sc->symbol->GetName().AsCString(nullptr); + const char *cstr = sc.symbol->GetName().AsCString(nullptr); if (!cstr) return false; diff --git a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h index 623d481bf117..54f5a94388b9 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h +++ b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h @@ -138,7 +138,7 @@ public: ConstString GetDemangledFunctionNameWithoutArguments(Mangled mangled) const override; - bool GetFunctionDisplayName(const SymbolContext *sc, + bool GetFunctionDisplayName(const SymbolContext &sc, const ExecutionContext *exe_ctx, FunctionNameRepresentation representation, Stream &s) override; diff --git a/lldb/source/Symbol/SymbolContext.cpp b/lldb/source/Symbol/SymbolContext.cpp index 183947a69436..a9626bbc3777 100644 --- a/lldb/source/Symbol/SymbolContext.cpp +++ b/lldb/source/Symbol/SymbolContext.cpp @@ -872,6 +872,36 @@ const Symbol *SymbolContext::FindBestGlobalDataSymbol(ConstString name, return nullptr; // no error; we just didn't find anything } +char const *SymbolContext::GetPossiblyInlinedFunctionName( + Mangled::NamePreference mangling_preference) const { + const char *name = nullptr; + if (function) + name = function->GetMangled().GetName(mangling_preference).AsCString(); + else if (symbol) + name = symbol->GetMangled().GetName(mangling_preference).AsCString(); + + if (!block) + return name; + + const Block *inline_block = block->GetContainingInlinedBlock(); + if (!inline_block) + return name; + + const InlineFunctionInfo *inline_info = + inline_block->GetInlinedFunctionInfo(); + if (!inline_info) + return name; + + // If we do have an inlined frame name, return that. + if (char const *inline_name = + inline_info->GetMangled().GetName(mangling_preference).AsCString()) + return inline_name; + + // Sometimes an inline frame may not have mangling information, + // but does have a valid name. + return inline_info->GetName().AsCString(); +} + // // SymbolContextSpecifier // diff --git a/lldb/source/Target/Language.cpp b/lldb/source/Target/Language.cpp index a75894ffa4b3..86754c251cd9 100644 --- a/lldb/source/Target/Language.cpp +++ b/lldb/source/Target/Language.cpp @@ -510,7 +510,7 @@ bool Language::IsNilReference(ValueObject &valobj) { return false; } bool Language::IsUninitializedReference(ValueObject &valobj) { return false; } -bool Language::GetFunctionDisplayName(const SymbolContext *sc, +bool Language::GetFunctionDisplayName(const SymbolContext &sc, const ExecutionContext *exe_ctx, FunctionNameRepresentation representation, Stream &s) { diff --git a/lldb/test/API/functionalities/param_entry_vals/basic_entry_values/main.cpp b/lldb/test/API/functionalities/param_entry_vals/basic_entry_values/main.cpp index 7ad72b4880d7..64e2a5b47967 100644 --- a/lldb/test/API/functionalities/param_entry_vals/basic_entry_values/main.cpp +++ b/lldb/test/API/functionalities/param_entry_vals/basic_entry_values/main.cpp @@ -70,8 +70,8 @@ __attribute__((noinline)) void func6(int &sink, int x) { __attribute__((noinline)) void func7(int &sink, int x) { //% self.filecheck("bt", "main.cpp", "-check-prefix=FUNC7-BT") // FUNC7-BT: func7 - // FUNC7-BT-NEXT: [inlined] func8_inlined - // FUNC7-BT-NEXT: [inlined] func9_inlined + // FUNC7-BT-NEXT: func8_inlined + // FUNC7-BT-NEXT: func9_inlined // FUNC7-BT-NEXT: func10 use<int &, int>(sink, x); use<int &, int>(dummy, 0); diff --git a/lldb/test/API/functionalities/tail_call_frames/inlining_and_tail_calls/main.cpp b/lldb/test/API/functionalities/tail_call_frames/inlining_and_tail_calls/main.cpp index 9829e0246fc2..0a7d365d776c 100644 --- a/lldb/test/API/functionalities/tail_call_frames/inlining_and_tail_calls/main.cpp +++ b/lldb/test/API/functionalities/tail_call_frames/inlining_and_tail_calls/main.cpp @@ -1,13 +1,13 @@ volatile int x; +// clang-format off void __attribute__((noinline)) tail_call_sink() { x++; //% self.filecheck("bt", "main.cpp", "-check-prefix=TAIL-CALL-SINK") // TAIL-CALL-SINK: frame #0: 0x{{[0-9a-f]+}} a.out`tail_call_sink() at main.cpp:[[@LINE-1]]:4 - // TAIL-CALL-SINK-NEXT: func3{{.*}} [artificial] + // TAIL-CALL-SINK-NEXT: inlinable_function_which_tail_calls() at main.cpp{{.*}} [artificial] // TAIL-CALL-SINK-NEXT: main{{.*}} - - // TODO: The backtrace should include inlinable_function_which_tail_calls. } +// clang-format on void __attribute__((always_inline)) inlinable_function_which_tail_calls() { tail_call_sink(); @@ -17,13 +17,15 @@ void __attribute__((noinline)) func3() { inlinable_function_which_tail_calls(); } +// clang-format off void __attribute__((always_inline)) inline_sink() { x++; //% self.filecheck("bt", "main.cpp", "-check-prefix=INLINE-SINK") - // INLINE-SINK: frame #0: 0x{{[0-9a-f]+}} a.out`func2() [inlined] inline_sink() at main.cpp:[[@LINE-1]]:4 + // INLINE-SINK: frame #0: 0x{{[0-9a-f]+}} a.out`inline_sink() at main.cpp:[[@LINE-1]]:4 // INLINE-SINK-NEXT: func2{{.*}} // INLINE-SINK-NEXT: func1{{.*}} [artificial] // INLINE-SINK-NEXT: main{{.*}} } +// clang-format on void __attribute__((noinline)) func2() { inline_sink(); /* inlined */ } diff --git a/lldb/test/API/python_api/target/read-instructions-flavor/TestTargetReadInstructionsFlavor.py b/lldb/test/API/python_api/target/read-instructions-flavor/TestTargetReadInstructionsFlavor.py index 12805985798d..f488d4f421c9 100644 --- a/lldb/test/API/python_api/target/read-instructions-flavor/TestTargetReadInstructionsFlavor.py +++ b/lldb/test/API/python_api/target/read-instructions-flavor/TestTargetReadInstructionsFlavor.py @@ -7,6 +7,7 @@ from lldbsuite.test.lldbtest import * class TargetReadInstructionsFlavor(TestBase): + @skipIfDarwin @skipIfWindows @skipIf(archs=no_match(["x86_64", "x86", "i386"])) def test_read_instructions_with_flavor(self): diff --git a/lldb/test/Shell/Recognizer/verbose_trap-in-stl-max-depth.test b/lldb/test/Shell/Recognizer/verbose_trap-in-stl-max-depth.test index 0c3275c571b3..2ea6594643c9 100644 --- a/lldb/test/Shell/Recognizer/verbose_trap-in-stl-max-depth.test +++ b/lldb/test/Shell/Recognizer/verbose_trap-in-stl-max-depth.test @@ -12,5 +12,5 @@ run frame recognizer info 0 # CHECK: frame 0 is recognized by Verbose Trap StackFrame Recognizer frame info -# CHECK: frame #0: {{.*}}`std::recursively_aborts(int) {{.*}} at verbose_trap-in-stl-max-depth.cpp +# CHECK: frame #0: {{.*}}`__clang_trap_msg$Error$max depth at verbose_trap-in-stl-max-depth.cpp q diff --git a/lldb/test/Shell/Settings/TestFrameFormatName.test b/lldb/test/Shell/Settings/TestFrameFormatName.test index caa3242527c6..110daceb47b4 100644 --- a/lldb/test/Shell/Settings/TestFrameFormatName.test +++ b/lldb/test/Shell/Settings/TestFrameFormatName.test @@ -30,7 +30,7 @@ c c # NAME_WITH_ARGS: frame Foo::returns_func_ptr<int>(this={{.*}}, (null)={{.*}}) c -# NAME_WITH_ARGS: frame main [inlined] inlined_foo(str="bar") +# NAME_WITH_ARGS: frame inlined_foo(str="bar") q #--- name.input @@ -38,18 +38,18 @@ q settings set -f frame-format "frame ${function.name}\n" break set -n inlined_foo run -# NAME: frame main [inlined] inlined_foo(char const*) +# NAME: frame inlined_foo(char const*) #--- name_without_args.input # RUN: %lldb -b -s %t/name_without_args.input %t.out | FileCheck %s --check-prefix=NAME_WITHOUT_ARGS settings set -f frame-format "frame ${function.name-without-args}\n" break set -n inlined_foo run -# NAME_WITHOUT_ARGS: frame main [inlined] inlined_foo(char const*) +# NAME_WITHOUT_ARGS: frame inlined_foo #--- mangled_name.input # RUN: %lldb -b -s %t/mangled_name.input %t.out | FileCheck %s --check-prefix=MANGLED_NAME settings set -f frame-format "frame ${function.mangled-name}\n" break set -n inlined_foo run -# MANGLED_NAME: frame main [inlined] inlined_foo(char const*) +# MANGLED_NAME: frame _Z11inlined_fooPKc diff --git a/lldb/test/Shell/SymbolFile/NativePDB/inline_sites_live.cpp b/lldb/test/Shell/SymbolFile/NativePDB/inline_sites_live.cpp index 906f3d7dff0a..4a06e6350b00 100644 --- a/lldb/test/Shell/SymbolFile/NativePDB/inline_sites_live.cpp +++ b/lldb/test/Shell/SymbolFile/NativePDB/inline_sites_live.cpp @@ -23,11 +23,11 @@ int main(int argc, char** argv) { }
// CHECK: * thread #1, {{.*}}stop reason = breakpoint 1
-// CHECK-NEXT: frame #0: {{.*}}`main [inlined] bar(param=2)
+// CHECK-NEXT: frame #0: {{.*}}`bar(param=2)
// CHECK: (lldb) expression param
// CHECK-NEXT: (int) $0 = 2
// CHECK: * thread #1, {{.*}}stop reason = breakpoint 2
-// CHECK-NEXT: frame #0: {{.*}}`main [inlined] foo(param=1)
+// CHECK-NEXT: frame #0: {{.*}}`foo(param=1)
// CHECK: (lldb) expression param
// CHECK-NEXT: (int) $1 = 1
// CHECK-NEXT: (lldb) expression local
diff --git a/llvm/cmake/modules/TableGen.cmake b/llvm/cmake/modules/TableGen.cmake index ffcc718b4777..bf914c379e80 100644 --- a/llvm/cmake/modules/TableGen.cmake +++ b/llvm/cmake/modules/TableGen.cmake @@ -68,7 +68,9 @@ function(tablegen project ofn) # char literals, instead. If we're cross-compiling, then conservatively assume # that the source might be consumed by MSVC. # [1] https://docs.microsoft.com/en-us/cpp/cpp/compiler-limits?view=vs-2017 - if (MSVC AND project STREQUAL LLVM) + # Don't pass this flag to mlir-src-sharder, since it doesn't support the + # flag, and it doesn't need it. + if (MSVC AND NOT "${project}" STREQUAL "MLIR_SRC_SHARDER") list(APPEND LLVM_TABLEGEN_FLAGS "--long-string-literals=0") endif() if (CMAKE_GENERATOR MATCHES "Visual Studio") diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h index 25fc8a00f7dd..01e51accec9d 100644 --- a/llvm/include/llvm/ADT/APFloat.h +++ b/llvm/include/llvm/ADT/APFloat.h @@ -354,6 +354,7 @@ struct APFloatBase { static bool semanticsHasInf(const fltSemantics &); static bool semanticsHasNaN(const fltSemantics &); static bool isIEEELikeFP(const fltSemantics &); + static bool hasSignBitInMSB(const fltSemantics &); // Returns true if any number described by \p Src can be precisely represented // by a normal (not subnormal) value in \p Dst. diff --git a/llvm/include/llvm/CodeGen/BranchRelaxation.h b/llvm/include/llvm/CodeGen/BranchRelaxation.h new file mode 100644 index 000000000000..2007cf05b3aa --- /dev/null +++ b/llvm/include/llvm/CodeGen/BranchRelaxation.h @@ -0,0 +1,25 @@ +//===- llvm/CodeGen/BranchRelaxation.h --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_BRANCHRELAXATION_H +#define LLVM_CODEGEN_BRANCHRELAXATION_H + +#include "llvm/CodeGen/MachinePassManager.h" + +namespace llvm { + +class BranchRelaxationPass : public PassInfoMixin<BranchRelaxationPass> { +public: + PreservedAnalyses run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM); + static bool isRequired() { return true; } +}; + +} // namespace llvm + +#endif // LLVM_CODEGEN_BRANCHRELAXATION_H diff --git a/llvm/include/llvm/CodeGen/RemoveLoadsIntoFakeUses.h b/llvm/include/llvm/CodeGen/RemoveLoadsIntoFakeUses.h new file mode 100644 index 000000000000..bbd5b8b430bf --- /dev/null +++ b/llvm/include/llvm/CodeGen/RemoveLoadsIntoFakeUses.h @@ -0,0 +1,30 @@ +//===- llvm/CodeGen/RemoveLoadsIntoFakeUses.h -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_REMOVELOADSINTOFAKEUSES_H +#define LLVM_CODEGEN_REMOVELOADSINTOFAKEUSES_H + +#include "llvm/CodeGen/MachinePassManager.h" + +namespace llvm { + +class RemoveLoadsIntoFakeUsesPass + : public PassInfoMixin<RemoveLoadsIntoFakeUsesPass> { +public: + PreservedAnalyses run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM); + + MachineFunctionProperties getRequiredProperties() const { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } +}; + +} // namespace llvm + +#endif // LLVM_CODEGEN_REMOVELOADSINTOFAKEUSES_H diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h index 6180c53a9a94..ab3eaa92548c 100644 --- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h +++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h @@ -467,10 +467,7 @@ public: /// Returns true if Reg contains RegUnit. bool hasRegUnit(MCRegister Reg, MCRegUnit RegUnit) const { - for (MCRegUnit Unit : regunits(Reg)) - if (Unit == RegUnit) - return true; - return false; + return llvm::is_contained(regunits(Reg), RegUnit); } /// Returns the original SrcReg unless it is the target of a copy-like diff --git a/llvm/include/llvm/DebugInfo/LogicalView/Readers/LVDWARFReader.h b/llvm/include/llvm/DebugInfo/LogicalView/Readers/LVDWARFReader.h index fdc97249d8e5..aa47bd9cd2cd 100644 --- a/llvm/include/llvm/DebugInfo/LogicalView/Readers/LVDWARFReader.h +++ b/llvm/include/llvm/DebugInfo/LogicalView/Readers/LVDWARFReader.h @@ -102,11 +102,7 @@ class LVDWARFReader final : public LVBinaryReader { } // Remove offset from global map. - void removeGlobalOffset(LVOffset Offset) { - LVOffsetElementMap::iterator Iter = GlobalOffsets.find(Offset); - if (Iter != GlobalOffsets.end()) - GlobalOffsets.erase(Iter); - } + void removeGlobalOffset(LVOffset Offset) { GlobalOffsets.erase(Offset); } // Get the location information for DW_AT_data_member_location. void processLocationMember(dwarf::Attribute Attr, diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 9fd5e7676b19..3242ccff7f87 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -61,7 +61,7 @@ void initializeBasicAAWrapperPassPass(PassRegistry &); void initializeBlockFrequencyInfoWrapperPassPass(PassRegistry &); void initializeBranchFolderLegacyPass(PassRegistry &); void initializeBranchProbabilityInfoWrapperPassPass(PassRegistry &); -void initializeBranchRelaxationPass(PassRegistry &); +void initializeBranchRelaxationLegacyPass(PassRegistry &); void initializeBreakCriticalEdgesPass(PassRegistry &); void initializeBreakFalseDepsPass(PassRegistry &); void initializeCanonicalizeFreezeInLoopsPass(PassRegistry &); @@ -267,7 +267,7 @@ void initializeRegionOnlyViewerPass(PassRegistry &); void initializeRegionPrinterPass(PassRegistry &); void initializeRegionViewerPass(PassRegistry &); void initializeRegisterCoalescerLegacyPass(PassRegistry &); -void initializeRemoveLoadsIntoFakeUsesPass(PassRegistry &); +void initializeRemoveLoadsIntoFakeUsesLegacyPass(PassRegistry &); void initializeRemoveRedundantDebugValuesLegacyPass(PassRegistry &); void initializeRenameIndependentSubregsLegacyPass(PassRegistry &); void initializeReplaceWithVeclibLegacyPass(PassRegistry &); diff --git a/llvm/include/llvm/MC/MCAsmBackend.h b/llvm/include/llvm/MC/MCAsmBackend.h index 5953de30c2eb..10eabd41e80f 100644 --- a/llvm/include/llvm/MC/MCAsmBackend.h +++ b/llvm/include/llvm/MC/MCAsmBackend.h @@ -109,11 +109,10 @@ public: return false; } - virtual bool evaluateTargetFixup(const MCAssembler &Asm, - const MCFixup &Fixup, const MCFragment *DF, - const MCValue &Target, - const MCSubtargetInfo *STI, uint64_t &Value, - bool &WasForced) { + virtual bool evaluateTargetFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCFragment *DF, const MCValue &Target, + const MCSubtargetInfo *STI, + uint64_t &Value) { llvm_unreachable("Need to implement hook if target has custom fixups"); } @@ -153,11 +152,9 @@ public: /// Target specific predicate for whether a given fixup requires the /// associated instruction to be relaxed. - virtual bool fixupNeedsRelaxationAdvanced(const MCAssembler &Asm, - const MCFixup &Fixup, bool Resolved, - uint64_t Value, - const MCRelaxableFragment *DF, - const bool WasForced) const; + virtual bool fixupNeedsRelaxationAdvanced(const MCAssembler &, + const MCFixup &, const MCValue &, + uint64_t, bool Resolved) const; /// Simple predicate for targets where !Resolved implies requiring relaxation virtual bool fixupNeedsRelaxation(const MCFixup &Fixup, @@ -225,11 +222,6 @@ public: return 0; } - /// Check whether a given symbol has been flagged with MICROMIPS flag. - virtual bool isMicroMips(const MCSymbol *Sym) const { - return false; - } - bool isDarwinCanonicalPersonality(const MCSymbol *Sym) const; }; diff --git a/llvm/include/llvm/MC/MCAssembler.h b/llvm/include/llvm/MC/MCAssembler.h index a68eb49fda28..57143e3d59b4 100644 --- a/llvm/include/llvm/MC/MCAssembler.h +++ b/llvm/include/llvm/MC/MCAssembler.h @@ -95,14 +95,11 @@ private: /// evaluates to. /// \param Value [out] On return, the value of the fixup as currently laid /// out. - /// \param WasForced [out] On return, the value in the fixup is set to the - /// correct value if WasForced is true, even if evaluateFixup returns false. - /// \return Whether the fixup value was fully resolved. This is true if the - /// \p Value result is fixed, otherwise the value may change due to + /// \param RecordReloc Record relocation if needed. /// relocation. bool evaluateFixup(const MCFixup &Fixup, const MCFragment *DF, MCValue &Target, const MCSubtargetInfo *STI, - uint64_t &Value, bool &WasForced) const; + uint64_t &Value, bool RecordReloc) const; /// Check whether a fixup can be satisfied, or whether it needs to be relaxed /// (increased in size, in order to hold its value correctly). @@ -127,9 +124,6 @@ private: bool relaxCVDefRange(MCCVDefRangeFragment &DF); bool relaxPseudoProbeAddr(MCPseudoProbeAddrFragment &DF); - std::tuple<MCValue, uint64_t, bool> - handleFixup(MCFragment &F, const MCFixup &Fixup, const MCSubtargetInfo *STI); - public: /// Construct a new assembler instance. // diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index 25ca982916ff..6e2c2683730c 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -74,6 +74,7 @@ #include "llvm/CodeGen/RegUsageInfoPropagate.h" #include "llvm/CodeGen/RegisterCoalescerPass.h" #include "llvm/CodeGen/RegisterUsageInfo.h" +#include "llvm/CodeGen/RemoveLoadsIntoFakeUses.h" #include "llvm/CodeGen/RemoveRedundantDebugValues.h" #include "llvm/CodeGen/RenameIndependentSubregs.h" #include "llvm/CodeGen/ReplaceWithVeclib.h" @@ -1003,6 +1004,7 @@ Error CodeGenPassBuilder<Derived, TargetMachineT>::addMachinePasses( addPass(FuncletLayoutPass()); + addPass(RemoveLoadsIntoFakeUsesPass()); addPass(StackMapLivenessPass()); addPass(LiveDebugValuesPass( getTM<TargetMachine>().Options.ShouldEmitDebugEntryValues())); diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index 232d5506f5b3..94febae16eee 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -137,6 +137,7 @@ MACHINE_FUNCTION_ANALYSIS("virtregmap", VirtRegMapAnalysis()) #ifndef MACHINE_FUNCTION_PASS #define MACHINE_FUNCTION_PASS(NAME, CREATE_PASS) #endif +MACHINE_FUNCTION_PASS("branch-relaxation", BranchRelaxationPass()) MACHINE_FUNCTION_PASS("dead-mi-elimination", DeadMachineInstructionElimPass()) MACHINE_FUNCTION_PASS("detect-dead-lanes", DetectDeadLanesPass()) MACHINE_FUNCTION_PASS("early-ifcvt", EarlyIfConverterPass()) @@ -181,6 +182,7 @@ MACHINE_FUNCTION_PASS("reg-usage-collector", RegUsageInfoCollectorPass()) MACHINE_FUNCTION_PASS("reg-usage-propagation", RegUsageInfoPropagationPass()) MACHINE_FUNCTION_PASS("register-coalescer", RegisterCoalescerPass()) MACHINE_FUNCTION_PASS("rename-independent-subregs", RenameIndependentSubregsPass()) +MACHINE_FUNCTION_PASS("remove-loads-into-fake-uses", RemoveLoadsIntoFakeUsesPass()) MACHINE_FUNCTION_PASS("remove-redundant-debug-values", RemoveRedundantDebugValuesPass()) MACHINE_FUNCTION_PASS("require-all-machine-function-properties", RequireAllMachineFunctionPropertiesPass()) @@ -310,7 +312,6 @@ DUMMY_MACHINE_FUNCTION_PASS("ra-pbqp", RAPBQPPass) DUMMY_MACHINE_FUNCTION_PASS("regalloc", RegAllocPass) DUMMY_MACHINE_FUNCTION_PASS("regallocscoringpass", RegAllocScoringPass) DUMMY_MACHINE_FUNCTION_PASS("regbankselect", RegBankSelectPass) -DUMMY_MACHINE_FUNCTION_PASS("remove-loads-into-fake-uses", RemoveLoadsIntoFakeUsesPass) DUMMY_MACHINE_FUNCTION_PASS("reset-machine-function", ResetMachineFunctionPass) DUMMY_MACHINE_FUNCTION_PASS("shrink-wrap", ShrinkWrapPass) DUMMY_MACHINE_FUNCTION_PASS("stack-frame-layout", StackFrameLayoutAnalysisPass) diff --git a/llvm/include/llvm/TableGen/Main.h b/llvm/include/llvm/TableGen/Main.h index e8c60e286990..5f68be188de7 100644 --- a/llvm/include/llvm/TableGen/Main.h +++ b/llvm/include/llvm/TableGen/Main.h @@ -13,6 +13,7 @@ #ifndef LLVM_TABLEGEN_MAIN_H #define LLVM_TABLEGEN_MAIN_H +#include "llvm/Support/CommandLine.h" #include <functional> namespace llvm { @@ -27,6 +28,10 @@ using TableGenMainFn = bool(raw_ostream &OS, const RecordKeeper &Records); int TableGenMain(const char *argv0, std::function<TableGenMainFn> MainFn = nullptr); +/// Controls emitting large character arrays as strings or character arrays. +/// Typically set to false when building with MSVC. +extern cl::opt<bool> EmitLongStrLiterals; + } // end namespace llvm #endif // LLVM_TABLEGEN_MAIN_H diff --git a/llvm/include/llvm/TableGen/StringToOffsetTable.h b/llvm/include/llvm/TableGen/StringToOffsetTable.h index e716411514bd..21795644d4bd 100644 --- a/llvm/include/llvm/TableGen/StringToOffsetTable.h +++ b/llvm/include/llvm/TableGen/StringToOffsetTable.h @@ -12,8 +12,6 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringMap.h" -#include "llvm/Support/FormatVariadic.h" -#include "llvm/Support/raw_ostream.h" #include <optional> namespace llvm { @@ -36,17 +34,7 @@ public: bool empty() const { return StringOffset.empty(); } size_t size() const { return AggregateString.size(); } - unsigned GetOrAddStringOffset(StringRef Str, bool appendZero = true) { - auto [II, Inserted] = StringOffset.insert({Str, size()}); - if (Inserted) { - // Add the string to the aggregate if this is the first time found. - AggregateString.append(Str.begin(), Str.end()); - if (appendZero) - AggregateString += '\0'; - } - - return II->second; - } + unsigned GetOrAddStringOffset(StringRef Str, bool appendZero = true); // Returns the offset of `Str` in the table if its preset, else return // std::nullopt. @@ -69,96 +57,10 @@ public: // `static` and `constexpr`. Both `Name` and (`Name` + "Storage") must be // valid identifiers to declare. void EmitStringTableDef(raw_ostream &OS, const Twine &Name, - const Twine &Indent = "") const { - OS << formatv(R"( -#ifdef __GNUC__ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Woverlength-strings" -#endif -{0}static constexpr char {1}Storage[] = )", - Indent, Name); - - // MSVC silently miscompiles string literals longer than 64k in some - // circumstances. When the string table is longer, emit it as an array of - // character literals. - bool UseChars = AggregateString.size() > (64 * 1024); - OS << (UseChars ? "{\n" : "\n"); - - llvm::ListSeparator LineSep(UseChars ? ",\n" : "\n"); - llvm::SmallVector<StringRef> Strings(split(AggregateString, '\0')); - // We should always have an empty string at the start, and because these are - // null terminators rather than separators, we'll have one at the end as - // well. Skip the end one. - assert(Strings.front().empty() && "Expected empty initial string!"); - assert(Strings.back().empty() && - "Expected empty string at the end due to terminators!"); - Strings.pop_back(); - for (StringRef Str : Strings) { - OS << LineSep << Indent << " "; - // If we can, just emit this as a string literal to be concatenated. - if (!UseChars) { - OS << "\""; - OS.write_escaped(Str); - OS << "\\0\""; - continue; - } - - llvm::ListSeparator CharSep(", "); - for (char C : Str) { - OS << CharSep << "'"; - OS.write_escaped(StringRef(&C, 1)); - OS << "'"; - } - OS << CharSep << "'\\0'"; - } - OS << LineSep << Indent << (UseChars ? "};" : " ;"); - - OS << formatv(R"( -#ifdef __GNUC__ -#pragma GCC diagnostic pop -#endif - -{0}static constexpr llvm::StringTable {1} = -{0} {1}Storage; -)", - Indent, Name); - } + const Twine &Indent = "") const; // Emit the string as one single string. - void EmitString(raw_ostream &O) const { - // Escape the string. - SmallString<256> EscapedStr; - raw_svector_ostream(EscapedStr).write_escaped(AggregateString); - - O << " \""; - unsigned CharsPrinted = 0; - for (unsigned i = 0, e = EscapedStr.size(); i != e; ++i) { - if (CharsPrinted > 70) { - O << "\"\n \""; - CharsPrinted = 0; - } - O << EscapedStr[i]; - ++CharsPrinted; - - // Print escape sequences all together. - if (EscapedStr[i] != '\\') - continue; - - assert(i + 1 < EscapedStr.size() && "Incomplete escape sequence!"); - if (isDigit(EscapedStr[i + 1])) { - assert(isDigit(EscapedStr[i + 2]) && isDigit(EscapedStr[i + 3]) && - "Expected 3 digit octal escape!"); - O << EscapedStr[++i]; - O << EscapedStr[++i]; - O << EscapedStr[++i]; - CharsPrinted += 3; - } else { - O << EscapedStr[++i]; - ++CharsPrinted; - } - } - O << "\""; - } + void EmitString(raw_ostream &O) const; }; } // end namespace llvm diff --git a/llvm/lib/Analysis/PHITransAddr.cpp b/llvm/lib/Analysis/PHITransAddr.cpp index e42113db4278..276708c2ebf7 100644 --- a/llvm/lib/Analysis/PHITransAddr.cpp +++ b/llvm/lib/Analysis/PHITransAddr.cpp @@ -224,6 +224,9 @@ Value *PHITransAddr::translateSubExpr(Value *V, BasicBlock *CurBB, // Scan to see if we have this GEP available. Value *APHIOp = GEPOps[0]; + if (isa<ConstantData>(APHIOp)) + return nullptr; + for (User *U : APHIOp->users()) { if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) if (GEPI->getType() == GEP->getType() && diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index c62ea1526981..d193c9e3210e 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -7841,7 +7841,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { unsigned GCD = std::min(MulZeros, TZ); APInt DivAmt = APInt::getOneBitSet(BitWidth, TZ - GCD); SmallVector<const SCEV*, 4> MulOps; - MulOps.push_back(getConstant(OpC->getAPInt().lshr(GCD))); + MulOps.push_back(getConstant(OpC->getAPInt().ashr(GCD))); append_range(MulOps, LHSMul->operands().drop_front()); auto *NewMul = getMulExpr(MulOps, LHSMul->getNoWrapFlags()); ShiftedLHS = getUDivExpr(NewMul, getConstant(DivAmt)); diff --git a/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/llvm/lib/CodeGen/AsmPrinter/WinException.cpp index 062283975851..55d1350e446a 100644 --- a/llvm/lib/CodeGen/AsmPrinter/WinException.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/WinException.cpp @@ -162,8 +162,7 @@ void WinException::endFunction(const MachineFunction *MF) { if (!MF->getEHContTargets().empty()) { // Copy the function's EH Continuation targets to a module-level list. - EHContTargets.insert(EHContTargets.end(), MF->getEHContTargets().begin(), - MF->getEHContTargets().end()); + llvm::append_range(EHContTargets, MF->getEHContTargets()); } } @@ -292,8 +291,7 @@ void WinException::endFuncletImpl() { if (!MF->getEHContTargets().empty()) { // Copy the function's EH Continuation targets to a module-level list. - EHContTargets.insert(EHContTargets.end(), MF->getEHContTargets().begin(), - MF->getEHContTargets().end()); + llvm::append_range(EHContTargets, MF->getEHContTargets()); } // Switch back to the funclet start .text section now that we are done diff --git a/llvm/lib/CodeGen/BranchRelaxation.cpp b/llvm/lib/CodeGen/BranchRelaxation.cpp index a762aab43ddd..fbdc784c928c 100644 --- a/llvm/lib/CodeGen/BranchRelaxation.cpp +++ b/llvm/lib/CodeGen/BranchRelaxation.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/BranchRelaxation.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LivePhysRegs.h" @@ -44,7 +45,7 @@ STATISTIC(NumUnconditionalRelaxed, "Number of unconditional branches relaxed"); namespace { -class BranchRelaxation : public MachineFunctionPass { +class BranchRelaxation { /// BasicBlockInfo - Information about the offset and size of a single /// basic block. struct BasicBlockInfo { @@ -116,22 +117,30 @@ class BranchRelaxation : public MachineFunctionPass { void verify(); public: + bool run(MachineFunction &MF); +}; + +class BranchRelaxationLegacy : public MachineFunctionPass { +public: static char ID; - BranchRelaxation() : MachineFunctionPass(ID) {} + BranchRelaxationLegacy() : MachineFunctionPass(ID) {} - bool runOnMachineFunction(MachineFunction &MF) override; + bool runOnMachineFunction(MachineFunction &MF) override { + return BranchRelaxation().run(MF); + } StringRef getPassName() const override { return BRANCH_RELAX_NAME; } }; } // end anonymous namespace -char BranchRelaxation::ID = 0; +char BranchRelaxationLegacy::ID = 0; -char &llvm::BranchRelaxationPassID = BranchRelaxation::ID; +char &llvm::BranchRelaxationPassID = BranchRelaxationLegacy::ID; -INITIALIZE_PASS(BranchRelaxation, DEBUG_TYPE, BRANCH_RELAX_NAME, false, false) +INITIALIZE_PASS(BranchRelaxationLegacy, DEBUG_TYPE, BRANCH_RELAX_NAME, false, + false) /// verify - check BBOffsets, BBSizes, alignment of islands void BranchRelaxation::verify() { @@ -744,7 +753,16 @@ bool BranchRelaxation::relaxBranchInstructions() { return Changed; } -bool BranchRelaxation::runOnMachineFunction(MachineFunction &mf) { +PreservedAnalyses +BranchRelaxationPass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM) { + if (!BranchRelaxation().run(MF)) + return PreservedAnalyses::all(); + + return getMachineFunctionPassPreservedAnalyses(); +} + +bool BranchRelaxation::run(MachineFunction &mf) { MF = &mf; LLVM_DEBUG(dbgs() << "***** BranchRelaxation *****\n"); diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index 8b777ed2bbc9..b77cefca00b7 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -23,7 +23,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeBasicBlockPathCloningPass(Registry); initializeBasicBlockSectionsPass(Registry); initializeBranchFolderLegacyPass(Registry); - initializeBranchRelaxationPass(Registry); + initializeBranchRelaxationLegacyPass(Registry); initializeBreakFalseDepsPass(Registry); initializeCallBrPreparePass(Registry); initializeCFGuardLongjmpPass(Registry); @@ -117,7 +117,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeRegUsageInfoCollectorLegacyPass(Registry); initializeRegUsageInfoPropagationLegacyPass(Registry); initializeRegisterCoalescerLegacyPass(Registry); - initializeRemoveLoadsIntoFakeUsesPass(Registry); + initializeRemoveLoadsIntoFakeUsesLegacyPass(Registry); initializeRemoveRedundantDebugValuesLegacyPass(Registry); initializeRenameIndependentSubregsLegacyPass(Registry); initializeSafeStackLegacyPassPass(Registry); diff --git a/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp b/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp index 4cd378f9aa59..a83982de14b2 100644 --- a/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp +++ b/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp @@ -2332,8 +2332,9 @@ void ComplexDeinterleavingGraph::replaceNodes() { } else if (RootNode->Operation == ComplexDeinterleavingOperation::ReductionSingle) { auto *RootInst = cast<Instruction>(RootNode->Real); - ReductionInfo[RootInst].first->removeIncomingValue(BackEdge); - DeadInstrRoots.push_back(ReductionInfo[RootInst].second); + auto &Info = ReductionInfo[RootInst]; + Info.first->removeIncomingValue(BackEdge); + DeadInstrRoots.push_back(Info.second); } else { assert(R && "Unable to find replacement for RootInstruction"); DeadInstrRoots.push_back(RootInstruction); diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index ac68eb55a6fd..ee271234d311 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1765,7 +1765,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor); buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors, /* PadStrategy = */ TargetOpcode::G_ZEXT); - Xors.insert(Xors.end(), WidenedXors.begin(), WidenedXors.end()); + llvm::append_range(Xors, WidenedXors); } // Now, for each part we broke up, we know if they are equal/not equal diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp index 1ac1a770ae72..df3dd4196548 100644 --- a/llvm/lib/CodeGen/MachineLICM.cpp +++ b/llvm/lib/CodeGen/MachineLICM.cpp @@ -1487,8 +1487,7 @@ void MachineLICMImpl::InitializeLoadsHoistableLoops() { auto *L = Worklist.pop_back_val(); AllowedToHoistLoads[L] = true; LoopsInPreOrder.push_back(L); - Worklist.insert(Worklist.end(), L->getSubLoops().begin(), - L->getSubLoops().end()); + llvm::append_range(Worklist, L->getSubLoops()); } // Going from the innermost to outermost loops, check if a loop has diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index dbd354f2ca2c..c27435aa2dae 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -3814,8 +3814,7 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { // into an existing tracking collection, or insert a new one. RegIt = RegToPHIIdx.find(CP.getDstReg()); if (RegIt != RegToPHIIdx.end()) - RegIt->second.insert(RegIt->second.end(), InstrNums.begin(), - InstrNums.end()); + llvm::append_range(RegIt->second, InstrNums); else RegToPHIIdx.insert({CP.getDstReg(), InstrNums}); } diff --git a/llvm/lib/CodeGen/RemoveLoadsIntoFakeUses.cpp b/llvm/lib/CodeGen/RemoveLoadsIntoFakeUses.cpp index 384a049acfe3..042fc13090ef 100644 --- a/llvm/lib/CodeGen/RemoveLoadsIntoFakeUses.cpp +++ b/llvm/lib/CodeGen/RemoveLoadsIntoFakeUses.cpp @@ -22,11 +22,13 @@ /// //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/RemoveLoadsIntoFakeUses.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveRegUnits.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachinePassManager.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Function.h" @@ -41,12 +43,13 @@ using namespace llvm; STATISTIC(NumLoadsDeleted, "Number of dead load instructions deleted"); STATISTIC(NumFakeUsesDeleted, "Number of FAKE_USE instructions deleted"); -class RemoveLoadsIntoFakeUses : public MachineFunctionPass { +class RemoveLoadsIntoFakeUsesLegacy : public MachineFunctionPass { public: static char ID; - RemoveLoadsIntoFakeUses() : MachineFunctionPass(ID) { - initializeRemoveLoadsIntoFakeUsesPass(*PassRegistry::getPassRegistry()); + RemoveLoadsIntoFakeUsesLegacy() : MachineFunctionPass(ID) { + initializeRemoveLoadsIntoFakeUsesLegacyPass( + *PassRegistry::getPassRegistry()); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -66,21 +69,45 @@ public: bool runOnMachineFunction(MachineFunction &MF) override; }; -char RemoveLoadsIntoFakeUses::ID = 0; -char &llvm::RemoveLoadsIntoFakeUsesID = RemoveLoadsIntoFakeUses::ID; +struct RemoveLoadsIntoFakeUses { + bool run(MachineFunction &MF); +}; + +char RemoveLoadsIntoFakeUsesLegacy::ID = 0; +char &llvm::RemoveLoadsIntoFakeUsesID = RemoveLoadsIntoFakeUsesLegacy::ID; -INITIALIZE_PASS_BEGIN(RemoveLoadsIntoFakeUses, DEBUG_TYPE, +INITIALIZE_PASS_BEGIN(RemoveLoadsIntoFakeUsesLegacy, DEBUG_TYPE, "Remove Loads Into Fake Uses", false, false) -INITIALIZE_PASS_END(RemoveLoadsIntoFakeUses, DEBUG_TYPE, +INITIALIZE_PASS_END(RemoveLoadsIntoFakeUsesLegacy, DEBUG_TYPE, "Remove Loads Into Fake Uses", false, false) -bool RemoveLoadsIntoFakeUses::runOnMachineFunction(MachineFunction &MF) { +bool RemoveLoadsIntoFakeUsesLegacy::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(MF.getFunction())) + return false; + + return RemoveLoadsIntoFakeUses().run(MF); +} + +PreservedAnalyses +RemoveLoadsIntoFakeUsesPass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM) { + MFPropsModifier _(*this, MF); + + if (!RemoveLoadsIntoFakeUses().run(MF)) + return PreservedAnalyses::all(); + + auto PA = getMachineFunctionPassPreservedAnalyses(); + PA.preserveSet<CFGAnalyses>(); + return PA; +} + +bool RemoveLoadsIntoFakeUses::run(MachineFunction &MF) { // Skip this pass if we would use VarLoc-based LDV, as there may be DBG_VALUE // instructions of the restored values that would become invalid. if (!MF.useDebugInstrRef()) return false; // Only run this for functions that have fake uses. - if (!MF.hasFakeUses() || skipFunction(MF.getFunction())) + if (!MF.hasFakeUses()) return false; bool AnyChanges = false; diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 5210372dd935..83fade45d189 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -530,7 +530,7 @@ SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) { NewOps.push_back(Op); } else if (Op != OrigOp) { // This is the first operand to change - add all operands so far. - NewOps.insert(NewOps.end(), N->op_begin(), N->op_begin() + i); + llvm::append_range(NewOps, N->ops().take_front(i)); NewOps.push_back(Op); } } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 083b984444bc..598de6b20775 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -2320,7 +2320,7 @@ void SelectionDAGISel::SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops, SelOps.size()); Flags.setMemConstraint(ConstraintID); Handles.emplace_back(CurDAG->getTargetConstant(Flags, DL, MVT::i32)); - Handles.insert(Handles.end(), SelOps.begin(), SelOps.end()); + llvm::append_range(Handles, SelOps); i += 2; } } diff --git a/llvm/lib/CodeGen/WindowScheduler.cpp b/llvm/lib/CodeGen/WindowScheduler.cpp index 95c86a9ac266..2492dfc3ca55 100644 --- a/llvm/lib/CodeGen/WindowScheduler.cpp +++ b/llvm/lib/CodeGen/WindowScheduler.cpp @@ -679,8 +679,7 @@ MachineInstr *WindowScheduler::getOriMI(MachineInstr *NewMI) { } unsigned WindowScheduler::getOriStage(MachineInstr *OriMI, unsigned Offset) { - assert(llvm::find(OriMIs, OriMI) != OriMIs.end() && - "Cannot find OriMI in OriMIs!"); + assert(llvm::is_contained(OriMIs, OriMI) && "Cannot find OriMI in OriMIs!"); // If there is no instruction fold, all MI stages are 0. if (Offset == SchedPhiNum) return 0; diff --git a/llvm/lib/ExecutionEngine/Orc/DebugObjectManagerPlugin.cpp b/llvm/lib/ExecutionEngine/Orc/DebugObjectManagerPlugin.cpp index 80b7452a0b22..15e583ca7685 100644 --- a/llvm/lib/ExecutionEngine/Orc/DebugObjectManagerPlugin.cpp +++ b/llvm/lib/ExecutionEngine/Orc/DebugObjectManagerPlugin.cpp @@ -475,8 +475,9 @@ Error DebugObjectManagerPlugin::notifyEmitted( FinalizePromise.set_value(MR.withResourceKeyDo([&](ResourceKey K) { assert(PendingObjs.count(&MR) && "We still hold PendingObjsLock"); std::lock_guard<std::mutex> Lock(RegisteredObjsLock); - RegisteredObjs[K].push_back(std::move(PendingObjs[&MR])); - PendingObjs.erase(&MR); + auto It = PendingObjs.find(&MR); + RegisteredObjs[K].push_back(std::move(It->second)); + PendingObjs.erase(It); })); }); diff --git a/llvm/lib/ExecutionEngine/Orc/Shared/ObjectFormats.cpp b/llvm/lib/ExecutionEngine/Orc/Shared/ObjectFormats.cpp index 3bd6c1e5be2c..d95f2f602fbc 100644 --- a/llvm/lib/ExecutionEngine/Orc/Shared/ObjectFormats.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Shared/ObjectFormats.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ExecutionEngine/Orc/Shared/ObjectFormats.h" +#include "llvm/ADT/STLExtras.h" namespace llvm { namespace orc { @@ -34,10 +35,7 @@ StringRef ELFThreadBSSSectionName = ".tbss"; StringRef ELFThreadDataSectionName = ".tdata"; bool isMachOInitializerSection(StringRef QualifiedName) { - for (auto &InitSection : MachOInitSectionNames) - if (InitSection == QualifiedName) - return true; - return false; + return llvm::is_contained(MachOInitSectionNames, QualifiedName); } bool isELFInitializerSection(StringRef SecName) { diff --git a/llvm/lib/MC/MCAsmBackend.cpp b/llvm/lib/MC/MCAsmBackend.cpp index 23cc134f65b5..85a208ffbce0 100644 --- a/llvm/lib/MC/MCAsmBackend.cpp +++ b/llvm/lib/MC/MCAsmBackend.cpp @@ -115,11 +115,10 @@ bool MCAsmBackend::shouldForceRelocation(const MCAssembler &, const MCFixup &, return Target.getSpecifier(); } -bool MCAsmBackend::fixupNeedsRelaxationAdvanced(const MCAssembler &Asm, +bool MCAsmBackend::fixupNeedsRelaxationAdvanced(const MCAssembler &, const MCFixup &Fixup, - bool Resolved, uint64_t Value, - const MCRelaxableFragment *DF, - const bool WasForced) const { + const MCValue &, uint64_t Value, + bool Resolved) const { if (!Resolved) return true; return fixupNeedsRelaxation(Fixup, Value); diff --git a/llvm/lib/MC/MCAssembler.cpp b/llvm/lib/MC/MCAssembler.cpp index 934bdb40d530..4e925809d20b 100644 --- a/llvm/lib/MC/MCAssembler.cpp +++ b/llvm/lib/MC/MCAssembler.cpp @@ -138,7 +138,7 @@ bool MCAssembler::isThumbFunc(const MCSymbol *Symbol) const { bool MCAssembler::evaluateFixup(const MCFixup &Fixup, const MCFragment *DF, MCValue &Target, const MCSubtargetInfo *STI, - uint64_t &Value, bool &WasForced) const { + uint64_t &Value, bool RecordReloc) const { ++stats::evaluateFixup; // FIXME: This code has some duplication with recordRelocation. We should @@ -150,47 +150,51 @@ bool MCAssembler::evaluateFixup(const MCFixup &Fixup, const MCFragment *DF, const MCExpr *Expr = Fixup.getValue(); MCContext &Ctx = getContext(); Value = 0; - WasForced = false; if (!Expr->evaluateAsRelocatable(Target, this)) { Ctx.reportError(Fixup.getLoc(), "expected relocatable expression"); return true; } - unsigned FixupFlags = getBackend().getFixupKindInfo(Fixup.getKind()).Flags; - if (FixupFlags & MCFixupKindInfo::FKF_IsTarget) - return getBackend().evaluateTargetFixup(*this, Fixup, DF, Target, STI, - Value, WasForced); - - const MCSymbol *Add = Target.getAddSym(); - const MCSymbol *Sub = Target.getSubSym(); - bool IsPCRel = FixupFlags & MCFixupKindInfo::FKF_IsPCRel; bool IsResolved = false; - if (!IsPCRel) { - IsResolved = Target.isAbsolute(); - } else if (Add && !Sub && !Add->isUndefined() && !Add->isAbsolute()) { - IsResolved = (FixupFlags & MCFixupKindInfo::FKF_Constant) || - getWriter().isSymbolRefDifferenceFullyResolvedImpl( - *this, *Add, *DF, false, true); + unsigned FixupFlags = getBackend().getFixupKindInfo(Fixup.getKind()).Flags; + if (FixupFlags & MCFixupKindInfo::FKF_IsTarget) { + IsResolved = + getBackend().evaluateTargetFixup(*this, Fixup, DF, Target, STI, Value); + } else { + const MCSymbol *Add = Target.getAddSym(); + const MCSymbol *Sub = Target.getSubSym(); + Value = Target.getConstant(); + if (Add && Add->isDefined()) + Value += getSymbolOffset(*Add); + if (Sub && Sub->isDefined()) + Value -= getSymbolOffset(*Sub); + + bool IsPCRel = FixupFlags & MCFixupKindInfo::FKF_IsPCRel; + bool ShouldAlignPC = + FixupFlags & MCFixupKindInfo::FKF_IsAlignedDownTo32Bits; + if (IsPCRel) { + uint64_t Offset = getFragmentOffset(*DF) + Fixup.getOffset(); + + // A number of ARM fixups in Thumb mode require that the effective PC + // address be determined as the 32-bit aligned version of the actual + // offset. + if (ShouldAlignPC) + Offset &= ~0x3; + Value -= Offset; + + if (Add && !Sub && !Add->isUndefined() && !Add->isAbsolute()) { + IsResolved = (FixupFlags & MCFixupKindInfo::FKF_Constant) || + getWriter().isSymbolRefDifferenceFullyResolvedImpl( + *this, *Add, *DF, false, true); + } + } else { + IsResolved = Target.isAbsolute(); + assert(!ShouldAlignPC && "FKF_IsAlignedDownTo32Bits must be PC-relative"); + } } - Value = Target.getConstant(); - if (Add && Add->isDefined()) - Value += getSymbolOffset(*Add); - if (Sub && Sub->isDefined()) - Value -= getSymbolOffset(*Sub); - - bool ShouldAlignPC = FixupFlags & MCFixupKindInfo::FKF_IsAlignedDownTo32Bits; - assert((ShouldAlignPC ? IsPCRel : true) && - "FKF_IsAlignedDownTo32Bits is only allowed on PC-relative fixups!"); - - if (IsPCRel) { - uint64_t Offset = getFragmentOffset(*DF) + Fixup.getOffset(); - - // A number of ARM fixups in Thumb mode require that the effective PC - // address be determined as the 32-bit aligned version of the actual offset. - if (ShouldAlignPC) Offset &= ~0x3; - Value -= Offset; - } + if (!RecordReloc) + return IsResolved; // .reloc directive and the backend might force the relocation. // Backends that customize shouldForceRelocation generally just need the fixup @@ -200,12 +204,12 @@ bool MCAssembler::evaluateFixup(const MCFixup &Fixup, const MCFragment *DF, auto TargetVal = Target; TargetVal.Cst = Value; if (Fixup.getKind() >= FirstLiteralRelocationKind || - getBackend().shouldForceRelocation(*this, Fixup, TargetVal, STI)) { + getBackend().shouldForceRelocation(*this, Fixup, TargetVal, STI)) IsResolved = false; - WasForced = true; - } } - + if (!IsResolved) + getWriter().recordRelocation(const_cast<MCAssembler &>(*this), DF, Fixup, + Target, Value); return IsResolved; } @@ -844,24 +848,6 @@ void MCAssembler::writeSectionData(raw_ostream &OS, OS.tell() - Start == getSectionAddressSize(*Sec)); } -std::tuple<MCValue, uint64_t, bool> -MCAssembler::handleFixup(MCFragment &F, const MCFixup &Fixup, - const MCSubtargetInfo *STI) { - // Evaluate the fixup. - MCValue Target; - uint64_t FixedValue; - bool WasForced; - bool IsResolved = - evaluateFixup(Fixup, &F, Target, STI, FixedValue, WasForced); - if (!IsResolved) { - // The fixup was unresolved, we need a relocation. Inform the object - // writer of the relocation, and give it an opportunity to adjust the - // fixup value if need be. - getWriter().recordRelocation(*this, &F, Fixup, Target, FixedValue); - } - return std::make_tuple(Target, FixedValue, IsResolved); -} - void MCAssembler::layout() { assert(getBackendPtr() && "Expected assembler backend"); DEBUG_WITH_TYPE("mc-dump", { @@ -987,10 +973,9 @@ void MCAssembler::layout() { } for (const MCFixup &Fixup : Fixups) { uint64_t FixedValue; - bool IsResolved; MCValue Target; - std::tie(Target, FixedValue, IsResolved) = - handleFixup(Frag, Fixup, STI); + bool IsResolved = evaluateFixup(Fixup, &Frag, Target, STI, FixedValue, + /*RecordReloc=*/true); getBackend().applyFixup(*this, Fixup, Target, Contents, FixedValue, IsResolved, STI); } @@ -1012,11 +997,10 @@ bool MCAssembler::fixupNeedsRelaxation(const MCFixup &Fixup, assert(getBackendPtr() && "Expected assembler backend"); MCValue Target; uint64_t Value; - bool WasForced; bool Resolved = evaluateFixup(Fixup, DF, Target, DF->getSubtargetInfo(), - Value, WasForced); - return getBackend().fixupNeedsRelaxationAdvanced(*this, Fixup, Resolved, - Value, DF, WasForced); + Value, /*RecordReloc=*/false); + return getBackend().fixupNeedsRelaxationAdvanced(*this, Fixup, Target, Value, + Resolved); } bool MCAssembler::fragmentNeedsRelaxation(const MCRelaxableFragment *F) const { diff --git a/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp b/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp index 5a785d7afd1e..b0ec215aec20 100644 --- a/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp +++ b/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp @@ -857,8 +857,7 @@ static Error handleArgs(const CommonConfig &Config, const ELFConfig &ELFConfig, "cannot change section address in a non-relocatable file"); StringMap<AddressUpdate> SectionsToUpdateAddress; for (const SectionPatternAddressUpdate &PatternUpdate : - make_range(Config.ChangeSectionAddress.rbegin(), - Config.ChangeSectionAddress.rend())) { + reverse(Config.ChangeSectionAddress)) { for (SectionBase &Sec : Obj.sections()) { if (PatternUpdate.SectionPattern.matches(Sec.Name) && SectionsToUpdateAddress.try_emplace(Sec.Name, PatternUpdate.Update) diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index b923181e9726..215355827337 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -82,6 +82,7 @@ #include "llvm/CodeGen/AtomicExpand.h" #include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" #include "llvm/CodeGen/BranchFoldingPass.h" +#include "llvm/CodeGen/BranchRelaxation.h" #include "llvm/CodeGen/CallBrPrepare.h" #include "llvm/CodeGen/CodeGenPrepare.h" #include "llvm/CodeGen/ComplexDeinterleavingPass.h" @@ -148,6 +149,7 @@ #include "llvm/CodeGen/RegUsageInfoPropagate.h" #include "llvm/CodeGen/RegisterCoalescerPass.h" #include "llvm/CodeGen/RegisterUsageInfo.h" +#include "llvm/CodeGen/RemoveLoadsIntoFakeUses.h" #include "llvm/CodeGen/RemoveRedundantDebugValues.h" #include "llvm/CodeGen/RenameIndependentSubregs.h" #include "llvm/CodeGen/SafeStack.h" diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 300898bb092b..f172271be09a 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -926,14 +926,14 @@ PassBuilder::buildInlinerPipeline(OptimizationLevel Level, IP = getInlineParamsFromOptLevel(Level); else IP = getInlineParams(PTO.InlinerThreshold); - // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to - // disable hot callsite inline (as much as possible [1]) because it makes + // For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO, + // set hot-caller threshold to 0 to disable hot + // callsite inline (as much as possible [1]) because it makes // profile annotation in the backend inaccurate. // // [1] Note the cost of a function could be below zero due to erased // prologue / epilogue. - if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt && - PGOOpt->Action == PGOOptions::SampleUse) + if (isLTOPreLink(Phase) && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) IP.HotCallSiteThreshold = 0; if (PGOOpt) @@ -1023,14 +1023,14 @@ PassBuilder::buildModuleInlinerPipeline(OptimizationLevel Level, ModulePassManager MPM; InlineParams IP = getInlineParamsFromOptLevel(Level); - // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to - // disable hot callsite inline (as much as possible [1]) because it makes + // For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO, + // set hot-caller threshold to 0 to disable hot + // callsite inline (as much as possible [1]) because it makes // profile annotation in the backend inaccurate. // // [1] Note the cost of a function could be below zero due to erased // prologue / epilogue. - if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt && - PGOOpt->Action == PGOOptions::SampleUse) + if (isLTOPreLink(Phase) && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) IP.HotCallSiteThreshold = 0; if (PGOOpt) diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp index 4c59278eaaa0..dc1dd5d9c7f4 100644 --- a/llvm/lib/Passes/StandardInstrumentations.cpp +++ b/llvm/lib/Passes/StandardInstrumentations.cpp @@ -2001,14 +2001,12 @@ DotCfgDiff::DotCfgDiff(StringRef Title, const FuncDataT<DCData> &Before, for (auto &A : After.getData()) { StringRef Label = A.getKey(); const BlockDataT<DCData> &BD = A.getValue(); - unsigned C = NodePosition.count(Label); - if (C == 0) + auto It = NodePosition.find(Label); + if (It == NodePosition.end()) // This only exists in the after IR. Create the node. createNode(Label, BD, AfterColour); - else { - assert(C == 1 && "Unexpected multiple nodes."); - Nodes[NodePosition[Label]].setCommon(BD); - } + else + Nodes[It->second].setCommon(BD); // Add in the edges between the nodes (as common or only in after). for (StringMap<std::string>::const_iterator Sink = BD.getData().begin(), E = BD.getData().end(); diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp index e058010f9d26..a7b9f259bfed 100644 --- a/llvm/lib/Support/APFloat.cpp +++ b/llvm/lib/Support/APFloat.cpp @@ -125,6 +125,9 @@ struct fltSemantics { /* Whether this semantics can represent signed values */ bool hasSignedRepr = true; + + /* Whether the sign bit of this semantics is the most significant bit */ + bool hasSignBitInMSB = true; }; static constexpr fltSemantics semIEEEhalf = {15, -14, 11, 16}; @@ -144,9 +147,15 @@ static constexpr fltSemantics semFloat8E4M3B11FNUZ = { 4, -10, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero}; static constexpr fltSemantics semFloat8E3M4 = {3, -2, 5, 8}; static constexpr fltSemantics semFloatTF32 = {127, -126, 11, 19}; -static constexpr fltSemantics semFloat8E8M0FNU = { - 127, -127, 1, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes, - false, false}; +static constexpr fltSemantics semFloat8E8M0FNU = {127, + -127, + 1, + 8, + fltNonfiniteBehavior::NanOnly, + fltNanEncoding::AllOnes, + false, + false, + false}; static constexpr fltSemantics semFloat6E3M2FN = { 4, -2, 3, 6, fltNonfiniteBehavior::FiniteOnly}; @@ -358,6 +367,10 @@ bool APFloatBase::isIEEELikeFP(const fltSemantics &semantics) { return SemanticsToEnum(semantics) <= S_IEEEquad; } +bool APFloatBase::hasSignBitInMSB(const fltSemantics &semantics) { + return semantics.hasSignBitInMSB; +} + bool APFloatBase::isRepresentableAsNormalIn(const fltSemantics &Src, const fltSemantics &Dst) { // Exponent range must be larger. diff --git a/llvm/lib/TableGen/CMakeLists.txt b/llvm/lib/TableGen/CMakeLists.txt index 84815c773699..0f9284c8bb99 100644 --- a/llvm/lib/TableGen/CMakeLists.txt +++ b/llvm/lib/TableGen/CMakeLists.txt @@ -7,6 +7,7 @@ add_llvm_component_library(LLVMTableGen Record.cpp SetTheory.cpp StringMatcher.cpp + StringToOffsetTable.cpp TableGenBackend.cpp TableGenBackendSkeleton.cpp TGLexer.cpp diff --git a/llvm/lib/TableGen/Main.cpp b/llvm/lib/TableGen/Main.cpp index 35600bf2f1f8..ea716215e067 100644 --- a/llvm/lib/TableGen/Main.cpp +++ b/llvm/lib/TableGen/Main.cpp @@ -64,6 +64,15 @@ WriteIfChanged("write-if-changed", cl::desc("Only write output if it changed")); static cl::opt<bool> TimePhases("time-phases", cl::desc("Time phases of parser and backend")); +namespace llvm { +cl::opt<bool> EmitLongStrLiterals( + "long-string-literals", + cl::desc("when emitting large string tables, prefer string literals over " + "comma-separated char literals. This can be a readability and " + "compile-time performance win, but upsets some compilers"), + cl::Hidden, cl::init(true)); +} // end namespace llvm + static cl::opt<bool> NoWarnOnUnusedTemplateArgs( "no-warn-on-unused-template-args", cl::desc("Disable unused template argument warnings.")); diff --git a/llvm/lib/TableGen/StringToOffsetTable.cpp b/llvm/lib/TableGen/StringToOffsetTable.cpp new file mode 100644 index 000000000000..d73b5749ad7d --- /dev/null +++ b/llvm/lib/TableGen/StringToOffsetTable.cpp @@ -0,0 +1,120 @@ +//===- StringToOffsetTable.cpp - Emit a big concatenated string -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/TableGen/StringToOffsetTable.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/TableGen/Main.h" + +using namespace llvm; + +unsigned StringToOffsetTable::GetOrAddStringOffset(StringRef Str, + bool appendZero) { + auto [II, Inserted] = StringOffset.insert({Str, size()}); + if (Inserted) { + // Add the string to the aggregate if this is the first time found. + AggregateString.append(Str.begin(), Str.end()); + if (appendZero) + AggregateString += '\0'; + } + + return II->second; +} + +void StringToOffsetTable::EmitStringTableDef(raw_ostream &OS, const Twine &Name, + const Twine &Indent) const { + OS << formatv(R"( +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Woverlength-strings" +#endif +{0}static constexpr char {1}Storage[] = )", + Indent, Name); + + // MSVC silently miscompiles string literals longer than 64k in some + // circumstances. The build system sets EmitLongStrLiterals to false when it + // detects that it is targetting MSVC. When that option is false and the + // string table is longer than 64k, emit it as an array of character + // literals. + bool UseChars = !EmitLongStrLiterals && AggregateString.size() > (64 * 1024); + OS << (UseChars ? "{\n" : "\n"); + + llvm::ListSeparator LineSep(UseChars ? ",\n" : "\n"); + llvm::SmallVector<StringRef> Strings(split(AggregateString, '\0')); + // We should always have an empty string at the start, and because these are + // null terminators rather than separators, we'll have one at the end as + // well. Skip the end one. + assert(Strings.front().empty() && "Expected empty initial string!"); + assert(Strings.back().empty() && + "Expected empty string at the end due to terminators!"); + Strings.pop_back(); + for (StringRef Str : Strings) { + OS << LineSep << Indent << " "; + // If we can, just emit this as a string literal to be concatenated. + if (!UseChars) { + OS << "\""; + OS.write_escaped(Str); + OS << "\\0\""; + continue; + } + + llvm::ListSeparator CharSep(", "); + for (char C : Str) { + OS << CharSep << "'"; + OS.write_escaped(StringRef(&C, 1)); + OS << "'"; + } + OS << CharSep << "'\\0'"; + } + OS << LineSep << Indent << (UseChars ? "};" : " ;"); + + OS << formatv(R"( +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif + +{0}static constexpr llvm::StringTable {1} = +{0} {1}Storage; +)", + Indent, Name); +} + +void StringToOffsetTable::EmitString(raw_ostream &O) const { + // Escape the string. + SmallString<256> EscapedStr; + raw_svector_ostream(EscapedStr).write_escaped(AggregateString); + + O << " \""; + unsigned CharsPrinted = 0; + for (unsigned i = 0, e = EscapedStr.size(); i != e; ++i) { + if (CharsPrinted > 70) { + O << "\"\n \""; + CharsPrinted = 0; + } + O << EscapedStr[i]; + ++CharsPrinted; + + // Print escape sequences all together. + if (EscapedStr[i] != '\\') + continue; + + assert(i + 1 < EscapedStr.size() && "Incomplete escape sequence!"); + if (isDigit(EscapedStr[i + 1])) { + assert(isDigit(EscapedStr[i + 2]) && isDigit(EscapedStr[i + 3]) && + "Expected 3 digit octal escape!"); + O << EscapedStr[++i]; + O << EscapedStr[++i]; + O << EscapedStr[++i]; + CharsPrinted += 3; + } else { + O << EscapedStr[++i]; + ++CharsPrinted; + } + } + O << "\""; +} diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 6bf6ce716783..68218e59961c 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -3980,7 +3980,7 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots( } if (!InsertBeforeLR) - CSI.insert(CSI.end(), VGSaves.begin(), VGSaves.end()); + llvm::append_range(CSI, VGSaves); } Register LastReg = 0; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index d370f8c7ff6e..74217fad82a7 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -2760,6 +2760,9 @@ bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) { case AArch64::LDRXpre: case AArch64::LDURSWi: case AArch64::LDRSWpre: + // SVE instructions. + case AArch64::LDR_ZXI: + case AArch64::STR_ZXI: return true; } } @@ -2912,6 +2915,18 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const { return false; } + // Pairing SVE fills/spills is only valid for little-endian targets that + // implement VLS 128. + switch (MI.getOpcode()) { + default: + break; + case AArch64::LDR_ZXI: + case AArch64::STR_ZXI: + if (!Subtarget.isLittleEndian() || + Subtarget.getSVEVectorSizeInBits() != 128) + return false; + } + // Check if this load/store has a hint to avoid pair formation. // MachineMemOperands hints are set by the AArch64StorePairSuppress pass. if (isLdStPairSuppressed(MI)) diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index 06e633effe87..7c47492cf1a8 100644 --- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -298,6 +298,7 @@ static unsigned getMatchingNonSExtOpcode(unsigned Opc, case AArch64::STRXui: case AArch64::STRXpre: case AArch64::STURXi: + case AArch64::STR_ZXI: case AArch64::LDRDui: case AArch64::LDURDi: case AArch64::LDRDpre: @@ -316,6 +317,7 @@ static unsigned getMatchingNonSExtOpcode(unsigned Opc, case AArch64::LDRSui: case AArch64::LDURSi: case AArch64::LDRSpre: + case AArch64::LDR_ZXI: return Opc; case AArch64::LDRSWui: return AArch64::LDRWui; @@ -361,6 +363,7 @@ static unsigned getMatchingPairOpcode(unsigned Opc) { return AArch64::STPDpre; case AArch64::STRQui: case AArch64::STURQi: + case AArch64::STR_ZXI: return AArch64::STPQi; case AArch64::STRQpre: return AArch64::STPQpre; @@ -386,6 +389,7 @@ static unsigned getMatchingPairOpcode(unsigned Opc) { return AArch64::LDPDpre; case AArch64::LDRQui: case AArch64::LDURQi: + case AArch64::LDR_ZXI: return AArch64::LDPQi; case AArch64::LDRQpre: return AArch64::LDPQpre; @@ -1225,6 +1229,16 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, (void)MIBSXTW; LLVM_DEBUG(dbgs() << " Extend operand:\n "); LLVM_DEBUG(((MachineInstr *)MIBSXTW)->print(dbgs())); + } else if (Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI) { + // We are combining SVE fill/spill to LDP/STP, so we need to use the Q + // variant of the registers. + MachineOperand &MOp0 = MIB->getOperand(0); + MachineOperand &MOp1 = MIB->getOperand(1); + assert(AArch64::ZPRRegClass.contains(MOp0.getReg()) && + AArch64::ZPRRegClass.contains(MOp1.getReg()) && "Invalid register."); + MOp0.setReg(AArch64::Q0 + (MOp0.getReg() - AArch64::Z0)); + MOp1.setReg(AArch64::Q0 + (MOp1.getReg() - AArch64::Z0)); + LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs())); } else { LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs())); } @@ -1499,6 +1513,12 @@ static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI, if (OpcA == OpcB) return !AArch64InstrInfo::isPreLdSt(FirstMI); + // Bail out if one of the opcodes is SVE fill/spill, as we currently don't + // allow pairing them with other instructions. + if (OpcA == AArch64::LDR_ZXI || OpcA == AArch64::STR_ZXI || + OpcB == AArch64::LDR_ZXI || OpcB == AArch64::STR_ZXI) + return false; + // Two pre ld/st of different opcodes cannot be merged either if (AArch64InstrInfo::isPreLdSt(FirstMI) && AArch64InstrInfo::isPreLdSt(MI)) return false; @@ -2659,7 +2679,8 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) { // Get the needed alignments to check them if // ldp-aligned-only/stp-aligned-only features are opted. uint64_t MemAlignment = MemOp->getAlign().value(); - uint64_t TypeAlignment = Align(MemOp->getSize().getValue()).value(); + uint64_t TypeAlignment = + Align(MemOp->getSize().getValue().getKnownMinValue()).value(); if (MemAlignment < 2 * TypeAlignment) { NumFailedAlignmentCheck++; @@ -2820,11 +2841,18 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, } // 3) Find loads and stores that can be merged into a single load or store // pair instruction. + // When compiling for SVE 128, also try to combine SVE fill/spill + // instructions into LDP/STP. // e.g., // ldr x0, [x2] // ldr x1, [x2, #8] // ; becomes // ldp x0, x1, [x2] + // e.g., + // ldr z0, [x2] + // ldr z1, [x2, #1, mul vl] + // ; becomes + // ldp q0, q1, [x2] if (MBB.getParent()->getRegInfo().tracksLiveness()) { DefinedInBB.clear(); diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp index 54347b610c50..0ddd17cee134 100644 --- a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp +++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp @@ -261,15 +261,18 @@ bool AArch64MIPeepholeOpt::visitORR(MachineInstr &MI) { // A COPY from an FPR will become a FMOVSWr, so do so now so that we know // that the upper bits are zero. if (RC != &AArch64::FPR32RegClass && - ((RC != &AArch64::FPR64RegClass && RC != &AArch64::FPR128RegClass) || + ((RC != &AArch64::FPR64RegClass && RC != &AArch64::FPR128RegClass && + RC != &AArch64::ZPRRegClass) || SrcMI->getOperand(1).getSubReg() != AArch64::ssub)) return false; - Register CpySrc = SrcMI->getOperand(1).getReg(); + Register CpySrc; if (SrcMI->getOperand(1).getSubReg() == AArch64::ssub) { CpySrc = MRI->createVirtualRegister(&AArch64::FPR32RegClass); BuildMI(*SrcMI->getParent(), SrcMI, SrcMI->getDebugLoc(), TII->get(TargetOpcode::COPY), CpySrc) .add(SrcMI->getOperand(1)); + } else { + CpySrc = SrcMI->getOperand(1).getReg(); } BuildMI(*SrcMI->getParent(), SrcMI, SrcMI->getDebugLoc(), TII->get(AArch64::FMOVSWr), SrcMI->getOperand(0).getReg()) diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index ca1a48690195..2b9d32f9208f 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1492,9 +1492,8 @@ static bool isAllActivePredicate(Value *Pred) { if (cast<ScalableVectorType>(Pred->getType())->getMinNumElements() <= cast<ScalableVectorType>(UncastedPred->getType())->getMinNumElements()) Pred = UncastedPred; - - return match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>( - m_ConstantInt<AArch64SVEPredPattern::all>())); + auto *C = dyn_cast<Constant>(Pred); + return (C && C->isAllOnesValue()); } // Use SVE intrinsic info to eliminate redundant operands and/or canonicalise @@ -1701,14 +1700,7 @@ static std::optional<Instruction *> instCombineSVECmpNE(InstCombiner &IC, IntrinsicInst &II) { LLVMContext &Ctx = II.getContext(); - // Check that the predicate is all active - auto *Pg = dyn_cast<IntrinsicInst>(II.getArgOperand(0)); - if (!Pg || Pg->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue) - return std::nullopt; - - const auto PTruePattern = - cast<ConstantInt>(Pg->getOperand(0))->getZExtValue(); - if (PTruePattern != AArch64SVEPredPattern::all) + if (!isAllActivePredicate(II.getArgOperand(0))) return std::nullopt; // Check that we have a compare of zero.. @@ -2118,8 +2110,7 @@ instCombineSVEVectorBinOp(InstCombiner &IC, IntrinsicInst &II) { auto *OpPredicate = II.getOperand(0); auto BinOpCode = intrinsicIDToBinOpCode(II.getIntrinsicID()); if (BinOpCode == Instruction::BinaryOpsEnd || - !match(OpPredicate, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>( - m_ConstantInt<AArch64SVEPredPattern::all>()))) + !isAllActivePredicate(OpPredicate)) return std::nullopt; auto BinOp = IC.Builder.CreateBinOpFMF( BinOpCode, II.getOperand(1), II.getOperand(2), II.getFastMathFlags()); @@ -2641,6 +2632,13 @@ static std::optional<Instruction *> instCombineDMB(InstCombiner &IC, return std::nullopt; } +static std::optional<Instruction *> instCombinePTrue(InstCombiner &IC, + IntrinsicInst &II) { + if (match(II.getOperand(0), m_ConstantInt<AArch64SVEPredPattern::all>())) + return IC.replaceInstUsesWith(II, Constant::getAllOnesValue(II.getType())); + return std::nullopt; +} + std::optional<Instruction *> AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { @@ -2744,6 +2742,8 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, return instCombineSVEDupqLane(IC, II); case Intrinsic::aarch64_sve_insr: return instCombineSVEInsr(IC, II); + case Intrinsic::aarch64_sve_ptrue: + return instCombinePTrue(IC, II); } return std::nullopt; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp index 7b4d00c8214c..153b14ce6050 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp @@ -1491,12 +1491,10 @@ bool MFMAExpInterleaveOpt::analyzeDAG(const SIInstrInfo *TII) { return isBitPack(Opc); }); - auto *PackPred = - std::find_if((*TempMFMA)->Preds.begin(), (*TempMFMA)->Preds.end(), - [&isBitPack](SDep &Pred) { - auto Opc = Pred.getSUnit()->getInstr()->getOpcode(); - return isBitPack(Opc); - }); + auto *PackPred = llvm::find_if((*TempMFMA)->Preds, [&isBitPack](SDep &Pred) { + auto Opc = Pred.getSUnit()->getInstr()->getOpcode(); + return isBitPack(Opc); + }); if (PackPred == (*TempMFMA)->Preds.end()) return false; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index 94ecb6ba9a2b..6c01f6dd370f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -729,6 +729,11 @@ static bool isSupportedAccessType(FixedVectorType *VecTy, Type *AccessTy, // complicated. if (isa<FixedVectorType>(AccessTy)) { TypeSize AccTS = DL.getTypeStoreSize(AccessTy); + // If the type size and the store size don't match, we would need to do more + // than just bitcast to translate between an extracted/insertable subvectors + // and the accessed value. + if (AccTS * 8 != DL.getTypeSizeInBits(AccessTy)) + return false; TypeSize VecTS = DL.getTypeStoreSize(VecTy->getElementType()); return AccTS.isKnownMultipleOf(VecTS); } @@ -813,15 +818,17 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) { if (VectorType::isValidElementType(ElemTy) && NumElems > 0) { unsigned ElementSize = DL->getTypeSizeInBits(ElemTy) / 8; - unsigned AllocaSize = DL->getTypeStoreSize(AllocaTy); - // Expand vector if required to match padding of inner type, - // i.e. odd size subvectors. - // Storage size of new vector must match that of alloca for correct - // behaviour of byte offsets and GEP computation. - if (NumElems * ElementSize != AllocaSize) - NumElems = AllocaSize / ElementSize; - if (NumElems > 0 && (AllocaSize % ElementSize) == 0) - VectorTy = FixedVectorType::get(ElemTy, NumElems); + if (ElementSize > 0) { + unsigned AllocaSize = DL->getTypeStoreSize(AllocaTy); + // Expand vector if required to match padding of inner type, + // i.e. odd size subvectors. + // Storage size of new vector must match that of alloca for correct + // behaviour of byte offsets and GEP computation. + if (NumElems * ElementSize != AllocaSize) + NumElems = AllocaSize / ElementSize; + if (NumElems > 0 && (AllocaSize % ElementSize) == 0) + VectorTy = FixedVectorType::get(ElemTy, NumElems); + } } } @@ -861,7 +868,14 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) { LLVM_DEBUG(dbgs() << " Attempting promotion to: " << *VectorTy << "\n"); Type *VecEltTy = VectorTy->getElementType(); - unsigned ElementSize = DL->getTypeSizeInBits(VecEltTy) / 8; + unsigned ElementSizeInBits = DL->getTypeSizeInBits(VecEltTy); + if (ElementSizeInBits != DL->getTypeAllocSizeInBits(VecEltTy)) { + LLVM_DEBUG(dbgs() << " Cannot convert to vector if the allocation size " + "does not match the type's size\n"); + return false; + } + unsigned ElementSize = ElementSizeInBits / 8; + assert(ElementSize > 0); for (auto *U : Uses) { Instruction *Inst = cast<Instruction>(U->getUser()); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp index 43885587ad81..ca093be61d11 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp @@ -1081,8 +1081,7 @@ void AMDGPUSwLowerLDS::lowerNonKernelLDSAccesses( IRB.CreateLoad(IRB.getPtrTy(AMDGPUAS::GLOBAL_ADDRESS), BaseLoad); for (GlobalVariable *GV : LDSGlobals) { - const auto *GVIt = - std::find(OrdereLDSGlobals.begin(), OrdereLDSGlobals.end(), GV); + const auto *GVIt = llvm::find(OrdereLDSGlobals, GV); assert(GVIt != OrdereLDSGlobals.end()); uint32_t GVOffset = std::distance(OrdereLDSGlobals.begin(), GVIt); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 8f3138acaea0..b59e94085272 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -66,6 +66,7 @@ #include "llvm/Analysis/KernelInfo.h" #include "llvm/Analysis/UniformityAnalysis.h" #include "llvm/CodeGen/AtomicExpand.h" +#include "llvm/CodeGen/BranchRelaxation.h" #include "llvm/CodeGen/DeadMachineInstructionElim.h" #include "llvm/CodeGen/GlobalISel/CSEInfo.h" #include "llvm/CodeGen/GlobalISel/IRTranslator.h" @@ -2205,7 +2206,7 @@ void AMDGPUCodeGenPassBuilder::addPreEmitPass(AddMachinePass &addPass) const { addPass(AMDGPUInsertDelayAluPass()); } - // TODO: addPass(BranchRelaxationPass()); + addPass(BranchRelaxationPass()); } bool AMDGPUCodeGenPassBuilder::isPassEnabled(const cl::opt<bool> &Opt, diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 356040da9567..bd95bcd89e18 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -4055,8 +4055,7 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI, } if (IsChainCallConv) - Ops.insert(Ops.end(), ChainCallSpecialArgs.begin(), - ChainCallSpecialArgs.end()); + llvm::append_range(Ops, ChainCallSpecialArgs); // Add argument registers to the end of the list so that they are known live // into the call. @@ -15526,9 +15525,9 @@ SDNode *SITargetLowering::adjustWritemask(MachineSDNode *&Node, // Adjust the writemask in the node SmallVector<SDValue, 12> Ops; - Ops.insert(Ops.end(), Node->op_begin(), Node->op_begin() + DmaskIdx); + llvm::append_range(Ops, Node->ops().take_front(DmaskIdx)); Ops.push_back(DAG.getTargetConstant(NewDmask, SDLoc(Node), MVT::i32)); - Ops.insert(Ops.end(), Node->op_begin() + DmaskIdx + 1, Node->op_end()); + llvm::append_range(Ops, Node->ops().drop_front(DmaskIdx + 1)); MVT SVT = Node->getValueType(0).getVectorElementType().getSimpleVT(); diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index efdf642e29db..1673bfa15267 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -352,7 +352,7 @@ void SIMachineFunctionInfo::shiftWwmVGPRsToLowestRange( // Replace the register in SpillPhysVGPRs. This is needed to look for free // lanes while spilling special SGPRs like FP, BP, etc. during PEI. - auto *RegItr = std::find(SpillPhysVGPRs.begin(), SpillPhysVGPRs.end(), Reg); + auto *RegItr = llvm::find(SpillPhysVGPRs, Reg); if (RegItr != SpillPhysVGPRs.end()) { unsigned Idx = std::distance(SpillPhysVGPRs.begin(), RegItr); SpillPhysVGPRs[Idx] = NewReg; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 67b44d2a19ef..0611a97a3cdc 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -624,7 +624,9 @@ bool isMAC(unsigned Opc) { Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 || Opc == AMDGPU::V_FMAC_F16_e64_gfx10 || + Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 || Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx11 || + Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx12 || Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx12 || Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi || Opc == AMDGPU::V_DOT2C_F32_BF16_e64_vi || diff --git a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 2d45c42e724d..f8790dd063ae 100644 --- a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -2849,12 +2849,10 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { // Erase the entry into the DbgValueSinkCandidates for the DBG_VALUE // that was moved. auto DbgVar = createDebugVariableFromMachineInstr(DbgInstr); - auto DbgIt = DbgValueSinkCandidates.find(DbgVar); - // If the instruction is a DBG_VALUE_LIST, it may have already been - // erased from the DbgValueSinkCandidates. Only erase if it exists in - // the DbgValueSinkCandidates. - if (DbgIt != DbgValueSinkCandidates.end()) - DbgValueSinkCandidates.erase(DbgIt); + // Erase DbgVar from DbgValueSinkCandidates if still present. If the + // instruction is a DBG_VALUE_LIST, it may have already been erased from + // DbgValueSinkCandidates. + DbgValueSinkCandidates.erase(DbgVar); // Zero out original dbg instr forEachDbgRegOperand(DbgInstr, [&](MachineOperand &Op) { Op.setReg(0); }); diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index bed15bdc274b..88bcf4cc9c6c 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -335,8 +335,36 @@ const char *ARMAsmBackend::reasonForFixupRelaxation(const MCFixup &Fixup, return nullptr; } -bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, - uint64_t Value) const { +static bool needsInterworking(const MCAssembler &Asm, const MCSymbol *Sym, + unsigned FixupKind) { + // Create relocations for unconditional branches to function symbols with + // different execution mode in ELF binaries. + if (!Sym || !Sym->isELF()) + return false; + unsigned Type = cast<MCSymbolELF>(Sym)->getType(); + if ((Type == ELF::STT_FUNC || Type == ELF::STT_GNU_IFUNC)) { + if (Asm.isThumbFunc(Sym) && (FixupKind == ARM::fixup_arm_uncondbranch)) + return true; + if (!Asm.isThumbFunc(Sym) && (FixupKind == ARM::fixup_arm_thumb_br || + FixupKind == ARM::fixup_arm_thumb_bl || + FixupKind == ARM::fixup_t2_condbranch || + FixupKind == ARM::fixup_t2_uncondbranch)) + return true; + } + return false; +} + +bool ARMAsmBackend::fixupNeedsRelaxationAdvanced(const MCAssembler &Asm, + const MCFixup &Fixup, + const MCValue &Target, + uint64_t Value, + bool Resolved) const { + const MCSymbol *Sym = Target.getAddSym(); + if (needsInterworking(Asm, Sym, Fixup.getTargetKind())) + return true; + + if (!Resolved) + return true; return reasonForFixupRelaxation(Fixup, Value); } @@ -973,18 +1001,8 @@ bool ARMAsmBackend::shouldForceRelocation(const MCAssembler &Asm, } // Create relocations for unconditional branches to function symbols with // different execution mode in ELF binaries. - if (Sym && Sym->isELF()) { - unsigned Type = cast<MCSymbolELF>(Sym)->getType(); - if ((Type == ELF::STT_FUNC || Type == ELF::STT_GNU_IFUNC)) { - if (Asm.isThumbFunc(Sym) && (FixupKind == ARM::fixup_arm_uncondbranch)) - return true; - if (!Asm.isThumbFunc(Sym) && (FixupKind == ARM::fixup_arm_thumb_br || - FixupKind == ARM::fixup_arm_thumb_bl || - FixupKind == ARM::fixup_t2_condbranch || - FixupKind == ARM::fixup_t2_uncondbranch)) - return true; - } - } + if (needsInterworking(Asm, Sym, Fixup.getTargetKind())) + return true; // We must always generate a relocation for BL/BLX instructions if we have // a symbol to reference, as the linker relies on knowing the destination // symbol's thumb-ness to get interworking right. diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h index 4e4df16d890c..57588d989d54 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h @@ -54,8 +54,9 @@ public: const char *reasonForFixupRelaxation(const MCFixup &Fixup, uint64_t Value) const; - bool fixupNeedsRelaxation(const MCFixup &Fixup, - uint64_t Value) const override; + bool fixupNeedsRelaxationAdvanced(const MCAssembler &, + const MCFixup &, const MCValue &, uint64_t, + bool) const override; void relaxInstruction(MCInst &Inst, const MCSubtargetInfo &STI) const override; diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h index ec11a78f8a7a..9d35626f449d 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h @@ -14,6 +14,7 @@ #ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMUNWINDOPASM_H #define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMUNWINDOPASM_H +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include <cstddef> #include <cstdint> @@ -60,7 +61,7 @@ public: /// Emit unwind raw opcodes void EmitRaw(const SmallVectorImpl<uint8_t> &Opcodes) { - Ops.insert(Ops.end(), Opcodes.begin(), Opcodes.end()); + llvm::append_range(Ops, Opcodes); OpBegins.push_back(OpBegins.back() + Opcodes.size()); } diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp index ea7968f01ee4..792a55555a0d 100644 --- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp +++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp @@ -171,16 +171,13 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value, } } -bool CSKYAsmBackend::fixupNeedsRelaxationAdvanced(const MCAssembler &Asm, +bool CSKYAsmBackend::fixupNeedsRelaxationAdvanced(const MCAssembler &, const MCFixup &Fixup, - bool Resolved, uint64_t Value, - const MCRelaxableFragment *DF, - const bool WasForced) const { - // Return true if the symbol is actually unresolved. - // Resolved could be always false when shouldForceRelocation return true. - // We use !WasForced to indicate that the symbol is unresolved and not forced - // by shouldForceRelocation. - if (!Resolved && !WasForced) + const MCValue &, + uint64_t Value, + bool Resolved) const { + // Return true if the symbol is unresolved. + if (!Resolved) return true; int64_t Offset = int64_t(Value); diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h index 07c5065ea4b5..3ce2f37212dc 100644 --- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h +++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h @@ -39,11 +39,9 @@ public: bool mayNeedRelaxation(const MCInst &Inst, const MCSubtargetInfo &STI) const override; - bool fixupNeedsRelaxationAdvanced(const MCAssembler &Asm, - const MCFixup &Fixup, bool Resolved, - uint64_t Value, - const MCRelaxableFragment *DF, - const bool WasForced) const override; + bool fixupNeedsRelaxationAdvanced(const MCAssembler &, + const MCFixup &, const MCValue &, uint64_t, + bool) const override; bool writeNopData(raw_ostream &OS, uint64_t Count, const MCSubtargetInfo *STI) const override; diff --git a/llvm/lib/Target/Hexagon/HexagonTfrCleanup.cpp b/llvm/lib/Target/Hexagon/HexagonTfrCleanup.cpp index 0c31e274a4ee..71bdfc6657c5 100644 --- a/llvm/lib/Target/Hexagon/HexagonTfrCleanup.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTfrCleanup.cpp @@ -160,11 +160,8 @@ bool HexagonTfrCleanup::updateImmMap(MachineInstr *MI, ImmediateMap &IMap) { if (!Mo->isReg() || !Mo->isDef()) continue; unsigned R = Mo->getReg(); - for (MCRegAliasIterator AR(R, TRI, true); AR.isValid(); ++AR) { - ImmediateMap::iterator F = IMap.find(*AR); - if (F != IMap.end()) - IMap.erase(F); - } + for (MCRegAliasIterator AR(R, TRI, true); AR.isValid(); ++AR) + IMap.erase(*AR); } return true; } diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp index 1c1454a41cc0..ae1dac57d929 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp @@ -41,6 +41,7 @@ class HexagonAsmBackend : public MCAsmBackend { uint8_t OSABI; StringRef CPU; mutable uint64_t relaxedCnt; + mutable const MCInst *RelaxedMCB = nullptr; std::unique_ptr <MCInstrInfo> MCII; std::unique_ptr <MCInst *> RelaxTarget; MCInst * Extender; @@ -560,17 +561,17 @@ public: /// \param Inst - The instruction to test. bool mayNeedRelaxation(MCInst const &Inst, const MCSubtargetInfo &STI) const override { + RelaxedMCB = &Inst; return true; } /// fixupNeedsRelaxation - Target specific predicate for whether a given /// fixup requires the associated instruction to be relaxed. bool fixupNeedsRelaxationAdvanced(const MCAssembler &Asm, - const MCFixup &Fixup, bool Resolved, + const MCFixup &Fixup, const MCValue &, uint64_t Value, - const MCRelaxableFragment *DF, - const bool WasForced) const override { - MCInst const &MCB = DF->getInst(); + bool Resolved) const override { + MCInst const &MCB = *RelaxedMCB; assert(HexagonMCInstrInfo::isBundle(MCB)); *RelaxTarget = nullptr; diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp index 5f7974ab6cae..0c295997ab52 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -602,14 +602,6 @@ bool MipsAsmBackend::shouldForceRelocation(const MCAssembler &Asm, } } -bool MipsAsmBackend::isMicroMips(const MCSymbol *Sym) const { - if (const auto *ElfSym = dyn_cast<const MCSymbolELF>(Sym)) { - if (ElfSym->getOther() & ELF::STO_MIPS_MICROMIPS) - return true; - } - return false; -} - namespace { class WindowsMipsAsmBackend : public MipsAsmBackend { diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h index 9752615422b6..1e8504aaf2aa 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h @@ -54,8 +54,6 @@ public: bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, const MCSubtargetInfo *STI) override; - - bool isMicroMips(const MCSymbol *Sym) const override; }; // class MipsAsmBackend } // namespace diff --git a/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h b/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h index d9beab7ec42e..8feae341893a 100644 --- a/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h +++ b/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h @@ -50,8 +50,7 @@ public: /// Check if the symbol has a mapping. Having a mapping means the handle is /// replaced with a reference bool checkImageHandleSymbol(StringRef Symbol) const { - return ImageHandleList.end() != - std::find(ImageHandleList.begin(), ImageHandleList.end(), Symbol); + return llvm::is_contained(ImageHandleList, Symbol); } }; } diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index 55f1a90b2a01..f246aee4da39 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -539,17 +539,16 @@ public: // True if operand is a symbol with no modifiers, or a constant with no // modifiers and isShiftedInt<N-1, 1>(Op). template <int N> bool isBareSimmNLsb0() const { - int64_t Imm; - RISCVMCExpr::Specifier VK = RISCVMCExpr::VK_None; if (!isImm()) return false; - bool IsConstantImm = evaluateConstantImm(getImm(), Imm); - bool IsValid; - if (!IsConstantImm) - IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK); - else - IsValid = isShiftedInt<N - 1, 1>(fixImmediateForRV32(Imm, isRV64Imm())); - return IsValid && VK == RISCVMCExpr::VK_None; + + int64_t Imm; + if (evaluateConstantImm(getImm(), Imm)) + return isShiftedInt<N - 1, 1>(fixImmediateForRV32(Imm, isRV64Imm())); + + RISCVMCExpr::Specifier VK = RISCVMCExpr::VK_None; + return RISCVAsmParser::classifySymbolRef(getImm(), VK) && + VK == RISCVMCExpr::VK_None; } // True if operand is a symbol with no modifiers, or a constant with no @@ -2079,9 +2078,6 @@ ParseStatus RISCVAsmParser::parseBareSymbol(OperandVector &Operands) { SMLoc E = SMLoc::getFromPointer(S.getPointer() + Identifier.size()); - if (Identifier.consume_back("@plt")) - return Error(getLoc(), "'@plt' operand not valid for instruction"); - MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier); if (Sym->isVariable()) { @@ -2129,8 +2125,9 @@ ParseStatus RISCVAsmParser::parseCallSymbol(OperandVector &Operands) { Lex(); Lex(); StringRef PLT; + SMLoc Loc = getLoc(); if (getParser().parseIdentifier(PLT) || PLT != "plt") - return ParseStatus::Failure; + return Error(Loc, "@ (except the deprecated/ignored @plt) is disallowed"); } else if (!getLexer().peekTok().is(AsmToken::EndOfStatement)) { // Avoid parsing the register in `call rd, foo` as a call symbol. return ParseStatus::NoMatch; diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp index 49c8c6957aa3..b36b8bd3fb43 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp @@ -141,20 +141,19 @@ bool RISCVAsmBackend::shouldForceRelocation(const MCAssembler &Asm, return STI->hasFeature(RISCV::FeatureRelax) || ForceRelocs; } -bool RISCVAsmBackend::fixupNeedsRelaxationAdvanced( - const MCAssembler &Asm, const MCFixup &Fixup, bool Resolved, uint64_t Value, - const MCRelaxableFragment *DF, const bool WasForced) const { +bool RISCVAsmBackend::fixupNeedsRelaxationAdvanced(const MCAssembler &, + const MCFixup &Fixup, + const MCValue &, + uint64_t Value, + bool Resolved) const { if (!RelaxBranches) return false; int64_t Offset = int64_t(Value); unsigned Kind = Fixup.getTargetKind(); - // Return true if the symbol is actually unresolved. - // Resolved could be always false when shouldForceRelocation return true. - // We use !WasForced to indicate that the symbol is unresolved and not forced - // by shouldForceRelocation. - if (!Resolved && !WasForced) + // Return true if the symbol is unresolved. + if (!Resolved) return true; switch (Kind) { @@ -594,12 +593,9 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value, } } -bool RISCVAsmBackend::evaluateTargetFixup(const MCAssembler &Asm, - const MCFixup &Fixup, - const MCFragment *DF, - const MCValue &Target, - const MCSubtargetInfo *STI, - uint64_t &Value, bool &WasForced) { +bool RISCVAsmBackend::evaluateTargetFixup( + const MCAssembler &Asm, const MCFixup &Fixup, const MCFragment *DF, + const MCValue &Target, const MCSubtargetInfo *STI, uint64_t &Value) { const MCFixup *AUIPCFixup; const MCFragment *AUIPCDF; MCValue AUIPCTarget; @@ -646,12 +642,7 @@ bool RISCVAsmBackend::evaluateTargetFixup(const MCAssembler &Asm, Value = Asm.getSymbolOffset(SA) + AUIPCTarget.getConstant(); Value -= Asm.getFragmentOffset(*AUIPCDF) + AUIPCFixup->getOffset(); - if (shouldForceRelocation(Asm, *AUIPCFixup, AUIPCTarget, STI)) { - WasForced = true; - return false; - } - - return true; + return !shouldForceRelocation(Asm, *AUIPCFixup, AUIPCTarget, STI); } bool RISCVAsmBackend::handleAddSubRelocations(const MCAssembler &Asm, diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h index f5e8d340d9bc..5d585b4efc11 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h @@ -49,8 +49,8 @@ public: bool evaluateTargetFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCFragment *DF, const MCValue &Target, - const MCSubtargetInfo *STI, uint64_t &Value, - bool &WasForced) override; + const MCSubtargetInfo *STI, + uint64_t &Value) override; bool handleAddSubRelocations(const MCAssembler &Asm, const MCFragment &F, const MCFixup &Fixup, const MCValue &Target, @@ -68,12 +68,9 @@ public: const MCValue &Target, const MCSubtargetInfo *STI) override; - bool fixupNeedsRelaxationAdvanced(const MCAssembler &Asm, - const MCFixup &Fixup, bool Resolved, - uint64_t Value, - const MCRelaxableFragment *DF, - const bool WasForced) const override; - + bool fixupNeedsRelaxationAdvanced(const MCAssembler &, + const MCFixup &, const MCValue &, uint64_t, + bool) const override; std::optional<MCFixupKind> getFixupKind(StringRef Name) const override; diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp index 83ecf805489c..972bee5a0aa7 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp @@ -34,6 +34,10 @@ static cl::opt<bool> cl::desc("Disable the emission of assembler pseudo instructions"), cl::init(false), cl::Hidden); +static cl::opt<bool> EmitX8AsFP("riscv-emit-x8-as-fp", + cl::desc("Emit x8 as fp instead of s0"), + cl::init(false), cl::Hidden); + // Print architectural register names rather than the ABI names (such as x2 // instead of sp). // TODO: Make RISCVInstPrinter::getRegisterName non-static so that this can a @@ -54,6 +58,11 @@ bool RISCVInstPrinter::applyTargetSpecificCLOption(StringRef Opt) { ArchRegNames = true; return true; } + if (Opt == "emit-x8-as-fp") { + if (!ArchRegNames) + EmitX8AsFP = true; + return true; + } return false; } @@ -311,6 +320,13 @@ void RISCVInstPrinter::printVMaskReg(const MCInst *MI, unsigned OpNo, } const char *RISCVInstPrinter::getRegisterName(MCRegister Reg) { + // When PrintAliases is enabled, and EmitX8AsFP is enabled, x8 will be printed + // as fp instead of s0. Note that these similar registers are not replaced: + // - X8_H: used for f16 register in zhinx + // - X8_W: used for f32 register in zfinx + // - X8_X9: used for GPR Pair + if (!ArchRegNames && EmitX8AsFP && Reg == RISCV::X8) + return "fp"; return getRegisterName(Reg, ArchRegNames ? RISCV::NoRegAltName : RISCV::ABIRegAltName); } diff --git a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp index 53e88aa48556..86702bbe58f0 100644 --- a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp @@ -125,8 +125,7 @@ bool SPIRVExtensionsParser::parse(cl::Option &O, StringRef ArgName, if (Token.starts_with("+")) { EnabledExtensions.insert(NameValuePair->second); } else if (EnabledExtensions.count(NameValuePair->second)) { - if (std::find(Tokens.begin(), Tokens.end(), "+" + ExtensionName.str()) != - Tokens.end()) + if (llvm::is_contained(Tokens, "+" + ExtensionName.str())) return O.error( "Extension cannot be allowed and disallowed at the same time: " + ExtensionName.str()); diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp index 156b40eb43a3..68b24dbe9f00 100644 --- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp +++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp @@ -55,7 +55,7 @@ void WebAssemblyAsmTypeCheck::funcDecl(const wasm::WasmSignature &Sig) { void WebAssemblyAsmTypeCheck::localDecl( const SmallVectorImpl<wasm::ValType> &Locals) { - LocalTypes.insert(LocalTypes.end(), Locals.begin(), Locals.end()); + llvm::append_range(LocalTypes, Locals); } void WebAssemblyAsmTypeCheck::dumpTypeStack(Twine Msg) { @@ -357,8 +357,7 @@ bool WebAssemblyAsmTypeCheck::checkTryTable(SMLoc ErrorLoc, Opcode == wasm::WASM_OPCODE_CATCH_REF) { if (!getSignature(ErrorLoc, Inst.getOperand(OpIdx++), wasm::WASM_SYMBOL_TYPE_TAG, Sig)) - SentTypes.insert(SentTypes.end(), Sig->Params.begin(), - Sig->Params.end()); + llvm::append_range(SentTypes, Sig->Params); else Error = true; } diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 767818107de8..cb23487e6fbe 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -177,11 +177,9 @@ public: bool mayNeedRelaxation(const MCInst &Inst, const MCSubtargetInfo &STI) const override; - bool fixupNeedsRelaxationAdvanced(const MCAssembler &Asm, - const MCFixup &Fixup, bool Resolved, - uint64_t Value, - const MCRelaxableFragment *DF, - const bool WasForced) const override; + bool fixupNeedsRelaxationAdvanced(const MCAssembler &, + const MCFixup &, const MCValue &, uint64_t, + bool) const override; void relaxInstruction(MCInst &Inst, const MCSubtargetInfo &STI) const override; @@ -731,22 +729,22 @@ bool X86AsmBackend::mayNeedRelaxation(const MCInst &MI, MI.getOperand(MI.getNumOperands() - 1 - SkipOperands).isExpr()); } -bool X86AsmBackend::fixupNeedsRelaxationAdvanced(const MCAssembler &Asm, +bool X86AsmBackend::fixupNeedsRelaxationAdvanced(const MCAssembler &, const MCFixup &Fixup, - bool Resolved, uint64_t Value, - const MCRelaxableFragment *DF, - const bool WasForced) const { + const MCValue &Target, + uint64_t Value, + bool Resolved) const { // If resolved, relax if the value is too big for a (signed) i8. + // + // Currently, `jmp local@plt` relaxes JMP even if the offset is small, + // different from gas. if (Resolved) - return !isInt<8>(Value); + return !isInt<8>(Value) || Target.getSpecifier(); // Otherwise, relax unless there is a @ABS8 specifier. - if (Fixup.getKind() == FK_Data_1) { - MCValue Target; - if (Fixup.getValue()->evaluateAsRelocatable(Target, &Asm) && - Target.getAddSym() && Target.getSpecifier() == X86MCExpr::VK_ABS8) - return false; - } + if (Fixup.getKind() == FK_Data_1 && Target.getAddSym() && + Target.getSpecifier() == X86MCExpr::VK_ABS8) + return false; return true; } diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 38761e1fd7ee..577428cad6d6 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -338,7 +338,7 @@ def FeatureAVX10_1 : SubtargetFeature<"avx10.1-256", "HasAVX10_1", "true", "Support AVX10.1 up to 256-bit instruction", [FeatureCDI, FeatureVBMI, FeatureIFMA, FeatureVNNI, FeatureBF16, FeatureVPOPCNTDQ, FeatureVBMI2, FeatureBITALG, - FeatureVAES, FeatureVPCLMULQDQ, FeatureFP16]>; + FeatureFP16]>; def FeatureAVX10_1_512 : SubtargetFeature<"avx10.1-512", "HasAVX10_1_512", "true", "Support AVX10.1 up to 512-bit instruction", [FeatureAVX10_1, FeatureEVEX512]>; diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp index e4b7ed7cf9b6..2ae6dd6b3d1e 100644 --- a/llvm/lib/TargetParser/X86TargetParser.cpp +++ b/llvm/lib/TargetParser/X86TargetParser.cpp @@ -637,8 +637,7 @@ constexpr FeatureBitset ImpliedFeaturesAVXVNNI = FeatureAVX2; constexpr FeatureBitset ImpliedFeaturesAVX10_1 = FeatureAVX512CD | FeatureAVX512VBMI | FeatureAVX512IFMA | FeatureAVX512VNNI | FeatureAVX512BF16 | FeatureAVX512VPOPCNTDQ | - FeatureAVX512VBMI2 | FeatureAVX512BITALG | FeatureVAES | FeatureVPCLMULQDQ | - FeatureAVX512FP16; + FeatureAVX512VBMI2 | FeatureAVX512BITALG | FeatureAVX512FP16; constexpr FeatureBitset ImpliedFeaturesAVX10_1_512 = FeatureAVX10_1 | FeatureEVEX512; constexpr FeatureBitset ImpliedFeaturesAVX10_2 = FeatureAVX10_1; diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index df1f6fddeba6..a5e0251277d8 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -3807,8 +3807,7 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones( // Make sure we don't pick a previously existing caller edge of this // Node, which would be processed on a different iteration of the // outer loop over the saved CallerEdges. - if (std::find(CallerEdges.begin(), CallerEdges.end(), E) != - CallerEdges.end()) + if (llvm::is_contained(CallerEdges, E)) continue; // The CallerAllocTypeForAlloc and CalleeEdgeAllocTypesForCallerEdge // are updated further below for all cases where we just invoked diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 19bf81137aab..f1b225c0f238 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -2645,7 +2645,8 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) { !Builder.GetInsertBlock()->getParent()->hasFnAttribute( Attribute::NoImplicitFloat)) { Type *EltTy = CastOp->getType()->getScalarType(); - if (EltTy->isFloatingPointTy() && EltTy->isIEEE()) { + if (EltTy->isFloatingPointTy() && + APFloat::hasSignBitInMSB(EltTy->getFltSemantics())) { Value *FAbs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, CastOp); return new BitCastInst(FAbs, I.getType()); } @@ -4058,7 +4059,8 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { !Builder.GetInsertBlock()->getParent()->hasFnAttribute( Attribute::NoImplicitFloat)) { Type *EltTy = CastOp->getType()->getScalarType(); - if (EltTy->isFloatingPointTy() && EltTy->isIEEE()) { + if (EltTy->isFloatingPointTy() && + APFloat::hasSignBitInMSB(EltTy->getFltSemantics())) { Value *FAbs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, CastOp); Value *FNegFAbs = Builder.CreateFNeg(FAbs); return new BitCastInst(FNegFAbs, I.getType()); @@ -4860,7 +4862,8 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) { !Builder.GetInsertBlock()->getParent()->hasFnAttribute( Attribute::NoImplicitFloat)) { Type *EltTy = CastOp->getType()->getScalarType(); - if (EltTy->isFloatingPointTy() && EltTy->isIEEE()) { + if (EltTy->isFloatingPointTy() && + APFloat::hasSignBitInMSB(EltTy->getFltSemantics())) { Value *FNeg = Builder.CreateFNeg(CastOp); return new BitCastInst(FNeg, I.getType()); } diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp index d872a381050c..889b43a843be 100644 --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -2294,10 +2294,14 @@ collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L) { AliasSetTracker AST(BatchAA); auto IsPotentiallyPromotable = [L](const Instruction *I) { - if (const auto *SI = dyn_cast<StoreInst>(I)) - return L->isLoopInvariant(SI->getPointerOperand()); - if (const auto *LI = dyn_cast<LoadInst>(I)) - return L->isLoopInvariant(LI->getPointerOperand()); + if (const auto *SI = dyn_cast<StoreInst>(I)) { + const Value *PtrOp = SI->getPointerOperand(); + return !isa<ConstantData>(PtrOp) && L->isLoopInvariant(PtrOp); + } + if (const auto *LI = dyn_cast<LoadInst>(I)) { + const Value *PtrOp = LI->getPointerOperand(); + return !isa<ConstantData>(PtrOp) && L->isLoopInvariant(PtrOp); + } return false; }; diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp index 60619dbe2f58..950344722b5c 100644 --- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp +++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp @@ -754,9 +754,7 @@ public: /// Erase \p Inst from both ShapeMap (if an entry exists) and erase \p Inst /// itself. void eraseFromParentAndRemoveFromShapeMap(Instruction *Inst) { - auto Iter = ShapeMap.find(Inst); - if (Iter != ShapeMap.end()) - ShapeMap.erase(Iter); + ShapeMap.erase(Inst); Inst->eraseFromParent(); } diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index 2f5cf45a1d3d..f98a69380464 100644 --- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -3287,20 +3287,21 @@ static void computeLiveInValues(DominatorTree &DT, Function &F, // Seed the liveness for each individual block for (BasicBlock &BB : F) { Data.KillSet[&BB] = computeKillSet(&BB, GC); - Data.LiveSet[&BB].clear(); - computeLiveInValues(BB.rbegin(), BB.rend(), Data.LiveSet[&BB], GC); + auto &LiveSet = Data.LiveSet[&BB]; + LiveSet.clear(); + computeLiveInValues(BB.rbegin(), BB.rend(), LiveSet, GC); #ifndef NDEBUG for (Value *Kill : Data.KillSet[&BB]) assert(!Data.LiveSet[&BB].count(Kill) && "live set contains kill"); #endif - Data.LiveOut[&BB] = SetVector<Value *>(); - computeLiveOutSeed(&BB, Data.LiveOut[&BB], GC); - Data.LiveIn[&BB] = Data.LiveSet[&BB]; - Data.LiveIn[&BB].set_union(Data.LiveOut[&BB]); - Data.LiveIn[&BB].set_subtract(Data.KillSet[&BB]); - if (!Data.LiveIn[&BB].empty()) + auto &Out = Data.LiveOut[&BB] = SetVector<Value *>(); + computeLiveOutSeed(&BB, Out, GC); + auto &In = Data.LiveIn[&BB] = Data.LiveSet[&BB]; + In.set_union(Out); + In.set_subtract(Data.KillSet[&BB]); + if (!In.empty()) Worklist.insert_range(predecessors(&BB)); } diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp index 41bf202230e2..e25ec6c3b2a5 100644 --- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -111,20 +111,23 @@ Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty, Value *Ret = nullptr; - // Check to see if there is already a cast! - for (User *U : V->users()) { - if (U->getType() != Ty) - continue; - CastInst *CI = dyn_cast<CastInst>(U); - if (!CI || CI->getOpcode() != Op) - continue; + if (!isa<Constant>(V)) { + // Check to see if there is already a cast! + for (User *U : V->users()) { + if (U->getType() != Ty) + continue; + CastInst *CI = dyn_cast<CastInst>(U); + if (!CI || CI->getOpcode() != Op) + continue; - // Found a suitable cast that is at IP or comes before IP. Use it. Note that - // the cast must also properly dominate the Builder's insertion point. - if (IP->getParent() == CI->getParent() && &*BIP != CI && - (&*IP == CI || CI->comesBefore(&*IP))) { - Ret = CI; - break; + // Found a suitable cast that is at IP or comes before IP. Use it. Note + // that the cast must also properly dominate the Builder's insertion + // point. + if (IP->getParent() == CI->getParent() && &*BIP != CI && + (&*IP == CI || CI->comesBefore(&*IP))) { + Ret = CI; + break; + } } } diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index eac7e7c209c9..7f53aa7d4f73 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -4087,9 +4087,7 @@ bool llvm::foldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU, Instruction *Cond = dyn_cast<Instruction>(BI->getCondition()); - if (!Cond || - (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond) && - !isa<SelectInst>(Cond)) || + if (!Cond || !isa<CmpInst, BinaryOperator, SelectInst, TruncInst>(Cond) || Cond->getParent() != BB || !Cond->hasOneUse()) return false; diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index 2d0027d97601..4e37c587dc97 100644 --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -3002,6 +3002,9 @@ Value *LibCallSimplifier::optimizeSinCosPi(CallInst *CI, bool IsSin, IRBuilderBa return nullptr; Value *Arg = CI->getArgOperand(0); + if (isa<ConstantData>(Arg)) + return nullptr; + SmallVector<CallInst *, 1> SinCalls; SmallVector<CallInst *, 1> CosCalls; SmallVector<CallInst *, 1> SinCosCalls; diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 0acca63503af..a28cda9fe62b 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1373,21 +1373,6 @@ public: return false; } - /// Returns true if we're required to use a scalar epilogue for at least - /// the final iteration of the original loop for all VFs in \p Range. - /// A scalar epilogue must either be required for all VFs in \p Range or for - /// none. - bool requiresScalarEpilogue(VFRange Range) const { - auto RequiresScalarEpilogue = [this](ElementCount VF) { - return requiresScalarEpilogue(VF.isVector()); - }; - bool IsRequired = all_of(Range, RequiresScalarEpilogue); - assert( - (IsRequired || none_of(Range, RequiresScalarEpilogue)) && - "all VFs in range must agree on whether a scalar epilogue is required"); - return IsRequired; - } - /// Returns true if a scalar epilogue is not allowed due to optsize or a /// loop hint annotation. bool isScalarEpilogueAllowed() const { diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h index 2cd23efcf3ea..7934c47ee5ba 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h +++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h @@ -84,13 +84,15 @@ struct deferredval_ty { /// whichever value m_VPValue(X) populated. inline deferredval_ty m_Deferred(VPValue *const &V) { return V; } -/// Match a specified integer value or vector of all elements of that -/// value. \p BitWidth optionally specifies the bitwidth the matched constant -/// must have. If it is 0, the matched constant can have any bitwidth. -template <unsigned BitWidth = 0> struct specific_intval { - APInt Val; +/// Match an integer constant or vector of constants if Pred::isValue returns +/// true for the APInt. \p BitWidth optionally specifies the bitwidth the +/// matched constant must have. If it is 0, the matched constant can have any +/// bitwidth. +template <typename Pred, unsigned BitWidth = 0> struct int_pred_ty { + Pred P; - specific_intval(APInt V) : Val(std::move(V)) {} + int_pred_ty(Pred P) : P(std::move(P)) {} + int_pred_ty() : P() {} bool match(VPValue *VPV) const { if (!VPV->isLiveIn()) @@ -108,17 +110,45 @@ template <unsigned BitWidth = 0> struct specific_intval { if (BitWidth != 0 && CI->getBitWidth() != BitWidth) return false; - return APInt::isSameValue(CI->getValue(), Val); + return P.isValue(CI->getValue()); } }; +/// Match a specified integer value or vector of all elements of that +/// value. \p BitWidth optionally specifies the bitwidth the matched constant +/// must have. If it is 0, the matched constant can have any bitwidth. +struct is_specific_int { + APInt Val; + + is_specific_int(APInt Val) : Val(std::move(Val)) {} + + bool isValue(const APInt &C) const { return APInt::isSameValue(Val, C); } +}; + +template <unsigned Bitwidth = 0> +using specific_intval = int_pred_ty<is_specific_int, Bitwidth>; + inline specific_intval<0> m_SpecificInt(uint64_t V) { - return specific_intval<0>(APInt(64, V)); + return specific_intval<0>(is_specific_int(APInt(64, V))); } -inline specific_intval<1> m_False() { return specific_intval<1>(APInt(64, 0)); } +inline specific_intval<1> m_False() { + return specific_intval<1>(is_specific_int(APInt(64, 0))); +} -inline specific_intval<1> m_True() { return specific_intval<1>(APInt(64, 1)); } +inline specific_intval<1> m_True() { + return specific_intval<1>(is_specific_int(APInt(64, 1))); +} + +struct is_all_ones { + bool isValue(const APInt &C) const { return C.isAllOnes(); } +}; + +/// Match an integer or vector with all bits set. +/// For vectors, this includes constants with undefined elements. +inline int_pred_ty<is_all_ones> m_AllOnes() { + return int_pred_ty<is_all_ones>(); +} /// Matching combinators template <typename LTy, typename RTy> struct match_combine_or { diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 67a55aa67c97..dc9f953f7447 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -994,6 +994,14 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { return; } + // OR x, 1 -> 1. + if (match(&R, m_c_BinaryOr(m_VPValue(X), m_AllOnes()))) { + R.getVPSingleValue()->replaceAllUsesWith( + R.getOperand(0) == X ? R.getOperand(1) : R.getOperand(0)); + R.eraseFromParent(); + return; + } + if (match(&R, m_Select(m_VPValue(), m_VPValue(X), m_Deferred(X)))) return R.getVPSingleValue()->replaceAllUsesWith(X); diff --git a/llvm/test/Analysis/ScalarEvolution/pr135531.ll b/llvm/test/Analysis/ScalarEvolution/pr135531.ll new file mode 100644 index 000000000000..e172d56d3a51 --- /dev/null +++ b/llvm/test/Analysis/ScalarEvolution/pr135531.ll @@ -0,0 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -disable-output -passes='print<scalar-evolution>' < %s 2>&1 | FileCheck %s + +define i32 @pr135511(i32 %x) { +; CHECK-LABEL: 'pr135511' +; CHECK-NEXT: Classifying expressions for: @pr135511 +; CHECK-NEXT: %and = and i32 %x, 16382 +; CHECK-NEXT: --> (2 * (zext i13 (trunc i32 (%x /u 2) to i13) to i32))<nuw><nsw> U: [0,16383) S: [0,16383) +; CHECK-NEXT: %neg = sub nsw i32 0, %and +; CHECK-NEXT: --> (-2 * (zext i13 (trunc i32 (%x /u 2) to i13) to i32))<nsw> U: [0,-1) S: [-16382,1) +; CHECK-NEXT: %res = and i32 %neg, 268431360 +; CHECK-NEXT: --> (4096 * (zext i16 (trunc i32 ((-1 * (zext i13 (trunc i32 (%x /u 2) to i13) to i32))<nsw> /u 2048) to i16) to i32))<nuw><nsw> U: [0,268431361) S: [0,268431361) +; CHECK-NEXT: Determining loop execution counts for: @pr135511 +; + %and = and i32 %x, 16382 + %neg = sub nsw i32 0, %and + %res = and i32 %neg, 268431360 + ret i32 %res +} diff --git a/llvm/test/CodeGen/AArch64/aarch64-sve-fill-spill-pair.ll b/llvm/test/CodeGen/AArch64/aarch64-sve-fill-spill-pair.ll new file mode 100644 index 000000000000..503ead4eba2d --- /dev/null +++ b/llvm/test/CodeGen/AArch64/aarch64-sve-fill-spill-pair.ll @@ -0,0 +1,283 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=aarch64-linux-gnu -mattr=+sve -aarch64-sve-vector-bits-min=128 -aarch64-sve-vector-bits-max=128 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=aarch64_be-linux-gnu -mattr=+sve -aarch64-sve-vector-bits-min=128 -aarch64-sve-vector-bits-max=128 < %s | FileCheck %s --check-prefixes=CHECK-BE +; RUN: llc -verify-machineinstrs -mtriple=aarch64-linux-gnu -mattr=+sve,ldp-aligned-only -aarch64-sve-vector-bits-min=128 -aarch64-sve-vector-bits-max=128 < %s | FileCheck %s --check-prefixes=CHECK-LDPALIGNEDONLY +; RUN: llc -verify-machineinstrs -mtriple=aarch64-linux-gnu -mattr=+sve,stp-aligned-only -aarch64-sve-vector-bits-min=128 -aarch64-sve-vector-bits-max=128 < %s | FileCheck %s --check-prefixes=CHECK-STPALIGNEDONLY +; RUN: llc -verify-machineinstrs -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK-OFF +; RUN: llc -verify-machineinstrs -mtriple=aarch64-linux-gnu -mattr=+sve -aarch64-sve-vector-bits-min=256 -aarch64-sve-vector-bits-max=256 < %s | FileCheck %s --check-prefixes=CHECK-OFF + +define void @nxv16i8(ptr %ldptr, ptr %stptr) { +; CHECK-LABEL: nxv16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: stp q0, q1, [x1] +; CHECK-NEXT: ret +; +; CHECK-BE-LABEL: nxv16i8: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: ptrue p0.b +; CHECK-BE-NEXT: ld1b { z0.b }, p0/z, [x0] +; CHECK-BE-NEXT: ld1b { z1.b }, p0/z, [x0, #1, mul vl] +; CHECK-BE-NEXT: st1b { z0.b }, p0, [x1] +; CHECK-BE-NEXT: st1b { z1.b }, p0, [x1, #1, mul vl] +; CHECK-BE-NEXT: ret +; +; CHECK-LDPALIGNEDONLY-LABEL: nxv16i8: +; CHECK-LDPALIGNEDONLY: // %bb.0: +; CHECK-LDPALIGNEDONLY-NEXT: ldr z0, [x0] +; CHECK-LDPALIGNEDONLY-NEXT: ldr z1, [x0, #1, mul vl] +; CHECK-LDPALIGNEDONLY-NEXT: stp q0, q1, [x1] +; CHECK-LDPALIGNEDONLY-NEXT: ret +; +; CHECK-STPALIGNEDONLY-LABEL: nxv16i8: +; CHECK-STPALIGNEDONLY: // %bb.0: +; CHECK-STPALIGNEDONLY-NEXT: ldp q0, q1, [x0] +; CHECK-STPALIGNEDONLY-NEXT: str z0, [x1] +; CHECK-STPALIGNEDONLY-NEXT: str z1, [x1, #1, mul vl] +; CHECK-STPALIGNEDONLY-NEXT: ret +; +; CHECK-OFF-LABEL: nxv16i8: +; CHECK-OFF: // %bb.0: +; CHECK-OFF-NEXT: ldr z0, [x0] +; CHECK-OFF-NEXT: ldr z1, [x0, #1, mul vl] +; CHECK-OFF-NEXT: str z0, [x1] +; CHECK-OFF-NEXT: str z1, [x1, #1, mul vl] +; CHECK-OFF-NEXT: ret + %vscale = tail call i64 @llvm.vscale() + %vl = shl nuw nsw i64 %vscale, 4 + %ldptr2 = getelementptr inbounds nuw i8, ptr %ldptr, i64 %vl + %stptr2 = getelementptr inbounds nuw i8, ptr %stptr, i64 %vl + %ld1 = load <vscale x 16 x i8>, ptr %ldptr, align 1 + %ld2 = load <vscale x 16 x i8>, ptr %ldptr2, align 1 + store <vscale x 16 x i8> %ld1, ptr %stptr, align 1 + store <vscale x 16 x i8> %ld2, ptr %stptr2, align 1 + ret void +} + +define void @nxv16i8_max_range(ptr %ldptr, ptr %stptr) { +; CHECK-LABEL: nxv16i8_max_range: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp q0, q1, [x0, #-1024] +; CHECK-NEXT: stp q0, q1, [x1, #1008] +; CHECK-NEXT: ret +; +; CHECK-BE-LABEL: nxv16i8_max_range: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: rdvl x8, #1 +; CHECK-BE-NEXT: mov x9, #-1008 // =0xfffffffffffffc10 +; CHECK-BE-NEXT: mov x10, #-1024 // =0xfffffffffffffc00 +; CHECK-BE-NEXT: lsr x8, x8, #4 +; CHECK-BE-NEXT: mov w11, #1008 // =0x3f0 +; CHECK-BE-NEXT: mov w12, #1024 // =0x400 +; CHECK-BE-NEXT: ptrue p0.b +; CHECK-BE-NEXT: mul x9, x8, x9 +; CHECK-BE-NEXT: mul x10, x8, x10 +; CHECK-BE-NEXT: mul x11, x8, x11 +; CHECK-BE-NEXT: ld1b { z1.b }, p0/z, [x0, x9] +; CHECK-BE-NEXT: mul x8, x8, x12 +; CHECK-BE-NEXT: ld1b { z0.b }, p0/z, [x0, x10] +; CHECK-BE-NEXT: st1b { z0.b }, p0, [x1, x11] +; CHECK-BE-NEXT: st1b { z1.b }, p0, [x1, x8] +; CHECK-BE-NEXT: ret +; +; CHECK-LDPALIGNEDONLY-LABEL: nxv16i8_max_range: +; CHECK-LDPALIGNEDONLY: // %bb.0: +; CHECK-LDPALIGNEDONLY-NEXT: ldr z0, [x0, #-64, mul vl] +; CHECK-LDPALIGNEDONLY-NEXT: ldr z1, [x0, #-63, mul vl] +; CHECK-LDPALIGNEDONLY-NEXT: stp q0, q1, [x1, #1008] +; CHECK-LDPALIGNEDONLY-NEXT: ret +; +; CHECK-STPALIGNEDONLY-LABEL: nxv16i8_max_range: +; CHECK-STPALIGNEDONLY: // %bb.0: +; CHECK-STPALIGNEDONLY-NEXT: ldp q0, q1, [x0, #-1024] +; CHECK-STPALIGNEDONLY-NEXT: str z0, [x1, #63, mul vl] +; CHECK-STPALIGNEDONLY-NEXT: str z1, [x1, #64, mul vl] +; CHECK-STPALIGNEDONLY-NEXT: ret +; +; CHECK-OFF-LABEL: nxv16i8_max_range: +; CHECK-OFF: // %bb.0: +; CHECK-OFF-NEXT: ldr z0, [x0, #-64, mul vl] +; CHECK-OFF-NEXT: ldr z1, [x0, #-63, mul vl] +; CHECK-OFF-NEXT: str z0, [x1, #63, mul vl] +; CHECK-OFF-NEXT: str z1, [x1, #64, mul vl] +; CHECK-OFF-NEXT: ret + %vscale = tail call i64 @llvm.vscale() + %ldoff1 = mul i64 %vscale, -1024 + %ldoff2 = mul i64 %vscale, -1008 + %stoff1 = mul i64 %vscale, 1008 + %stoff2 = mul i64 %vscale, 1024 + %ldptr1 = getelementptr inbounds nuw i8, ptr %ldptr, i64 %ldoff1 + %ldptr2 = getelementptr inbounds nuw i8, ptr %ldptr, i64 %ldoff2 + %stptr1 = getelementptr inbounds nuw i8, ptr %stptr, i64 %stoff1 + %stptr2 = getelementptr inbounds nuw i8, ptr %stptr, i64 %stoff2 + %ld1 = load <vscale x 16 x i8>, ptr %ldptr1, align 1 + %ld2 = load <vscale x 16 x i8>, ptr %ldptr2, align 1 + store <vscale x 16 x i8> %ld1, ptr %stptr1, align 1 + store <vscale x 16 x i8> %ld2, ptr %stptr2, align 1 + ret void +} + +define void @nxv16i8_outside_range(ptr %ldptr, ptr %stptr) { +; CHECK-LABEL: nxv16i8_outside_range: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr z0, [x0, #-65, mul vl] +; CHECK-NEXT: ldr z1, [x0, #-64, mul vl] +; CHECK-NEXT: str z0, [x1, #64, mul vl] +; CHECK-NEXT: str z1, [x1, #65, mul vl] +; CHECK-NEXT: ret +; +; CHECK-BE-LABEL: nxv16i8_outside_range: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: rdvl x8, #1 +; CHECK-BE-NEXT: mov x9, #-1040 // =0xfffffffffffffbf0 +; CHECK-BE-NEXT: mov x10, #-1024 // =0xfffffffffffffc00 +; CHECK-BE-NEXT: lsr x8, x8, #4 +; CHECK-BE-NEXT: mov w11, #1024 // =0x400 +; CHECK-BE-NEXT: mov w12, #1040 // =0x410 +; CHECK-BE-NEXT: ptrue p0.b +; CHECK-BE-NEXT: mul x9, x8, x9 +; CHECK-BE-NEXT: mul x10, x8, x10 +; CHECK-BE-NEXT: mul x11, x8, x11 +; CHECK-BE-NEXT: ld1b { z0.b }, p0/z, [x0, x9] +; CHECK-BE-NEXT: mul x8, x8, x12 +; CHECK-BE-NEXT: ld1b { z1.b }, p0/z, [x0, x10] +; CHECK-BE-NEXT: st1b { z0.b }, p0, [x1, x11] +; CHECK-BE-NEXT: st1b { z1.b }, p0, [x1, x8] +; CHECK-BE-NEXT: ret +; +; CHECK-LDPALIGNEDONLY-LABEL: nxv16i8_outside_range: +; CHECK-LDPALIGNEDONLY: // %bb.0: +; CHECK-LDPALIGNEDONLY-NEXT: ldr z0, [x0, #-65, mul vl] +; CHECK-LDPALIGNEDONLY-NEXT: ldr z1, [x0, #-64, mul vl] +; CHECK-LDPALIGNEDONLY-NEXT: str z0, [x1, #64, mul vl] +; CHECK-LDPALIGNEDONLY-NEXT: str z1, [x1, #65, mul vl] +; CHECK-LDPALIGNEDONLY-NEXT: ret +; +; CHECK-STPALIGNEDONLY-LABEL: nxv16i8_outside_range: +; CHECK-STPALIGNEDONLY: // %bb.0: +; CHECK-STPALIGNEDONLY-NEXT: ldr z0, [x0, #-65, mul vl] +; CHECK-STPALIGNEDONLY-NEXT: ldr z1, [x0, #-64, mul vl] +; CHECK-STPALIGNEDONLY-NEXT: str z0, [x1, #64, mul vl] +; CHECK-STPALIGNEDONLY-NEXT: str z1, [x1, #65, mul vl] +; CHECK-STPALIGNEDONLY-NEXT: ret +; +; CHECK-OFF-LABEL: nxv16i8_outside_range: +; CHECK-OFF: // %bb.0: +; CHECK-OFF-NEXT: ldr z0, [x0, #-65, mul vl] +; CHECK-OFF-NEXT: ldr z1, [x0, #-64, mul vl] +; CHECK-OFF-NEXT: str z0, [x1, #64, mul vl] +; CHECK-OFF-NEXT: str z1, [x1, #65, mul vl] +; CHECK-OFF-NEXT: ret + %vscale = tail call i64 @llvm.vscale() + %ldoff1 = mul i64 %vscale, -1040 + %ldoff2 = mul i64 %vscale, -1024 + %stoff1 = mul i64 %vscale, 1024 + %stoff2 = mul i64 %vscale, 1040 + %ldptr1 = getelementptr inbounds nuw i8, ptr %ldptr, i64 %ldoff1 + %ldptr2 = getelementptr inbounds nuw i8, ptr %ldptr, i64 %ldoff2 + %stptr1 = getelementptr inbounds nuw i8, ptr %stptr, i64 %stoff1 + %stptr2 = getelementptr inbounds nuw i8, ptr %stptr, i64 %stoff2 + %ld1 = load <vscale x 16 x i8>, ptr %ldptr1, align 1 + %ld2 = load <vscale x 16 x i8>, ptr %ldptr2, align 1 + store <vscale x 16 x i8> %ld1, ptr %stptr1, align 1 + store <vscale x 16 x i8> %ld2, ptr %stptr2, align 1 + ret void +} + +define void @nxv16i8_2vl_stride(ptr %ldptr, ptr %stptr) { +; CHECK-LABEL: nxv16i8_2vl_stride: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr z0, [x0] +; CHECK-NEXT: ldr z1, [x0, #2, mul vl] +; CHECK-NEXT: str z0, [x1] +; CHECK-NEXT: str z1, [x1, #2, mul vl] +; CHECK-NEXT: ret +; +; CHECK-BE-LABEL: nxv16i8_2vl_stride: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: ptrue p0.b +; CHECK-BE-NEXT: ld1b { z0.b }, p0/z, [x0] +; CHECK-BE-NEXT: ld1b { z1.b }, p0/z, [x0, #2, mul vl] +; CHECK-BE-NEXT: st1b { z0.b }, p0, [x1] +; CHECK-BE-NEXT: st1b { z1.b }, p0, [x1, #2, mul vl] +; CHECK-BE-NEXT: ret +; +; CHECK-LDPALIGNEDONLY-LABEL: nxv16i8_2vl_stride: +; CHECK-LDPALIGNEDONLY: // %bb.0: +; CHECK-LDPALIGNEDONLY-NEXT: ldr z0, [x0] +; CHECK-LDPALIGNEDONLY-NEXT: ldr z1, [x0, #2, mul vl] +; CHECK-LDPALIGNEDONLY-NEXT: str z0, [x1] +; CHECK-LDPALIGNEDONLY-NEXT: str z1, [x1, #2, mul vl] +; CHECK-LDPALIGNEDONLY-NEXT: ret +; +; CHECK-STPALIGNEDONLY-LABEL: nxv16i8_2vl_stride: +; CHECK-STPALIGNEDONLY: // %bb.0: +; CHECK-STPALIGNEDONLY-NEXT: ldr z0, [x0] +; CHECK-STPALIGNEDONLY-NEXT: ldr z1, [x0, #2, mul vl] +; CHECK-STPALIGNEDONLY-NEXT: str z0, [x1] +; CHECK-STPALIGNEDONLY-NEXT: str z1, [x1, #2, mul vl] +; CHECK-STPALIGNEDONLY-NEXT: ret +; +; CHECK-OFF-LABEL: nxv16i8_2vl_stride: +; CHECK-OFF: // %bb.0: +; CHECK-OFF-NEXT: ldr z0, [x0] +; CHECK-OFF-NEXT: ldr z1, [x0, #2, mul vl] +; CHECK-OFF-NEXT: str z0, [x1] +; CHECK-OFF-NEXT: str z1, [x1, #2, mul vl] +; CHECK-OFF-NEXT: ret + %vscale = tail call i64 @llvm.vscale() + %vl = shl nuw nsw i64 %vscale, 5 + %ldptr2 = getelementptr inbounds nuw i8, ptr %ldptr, i64 %vl + %stptr2 = getelementptr inbounds nuw i8, ptr %stptr, i64 %vl + %ld1 = load <vscale x 16 x i8>, ptr %ldptr, align 1 + %ld2 = load <vscale x 16 x i8>, ptr %ldptr2, align 1 + store <vscale x 16 x i8> %ld1, ptr %stptr, align 1 + store <vscale x 16 x i8> %ld2, ptr %stptr2, align 1 + ret void +} + +define void @nxv2f64_32b_aligned(ptr %ldptr, ptr %stptr) { +; CHECK-LABEL: nxv2f64_32b_aligned: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: stp q0, q1, [x1] +; CHECK-NEXT: ret +; +; CHECK-BE-LABEL: nxv2f64_32b_aligned: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: ptrue p0.d +; CHECK-BE-NEXT: ld1d { z0.d }, p0/z, [x0] +; CHECK-BE-NEXT: ld1d { z1.d }, p0/z, [x0, #1, mul vl] +; CHECK-BE-NEXT: st1d { z0.d }, p0, [x1] +; CHECK-BE-NEXT: st1d { z1.d }, p0, [x1, #1, mul vl] +; CHECK-BE-NEXT: ret +; +; CHECK-LDPALIGNEDONLY-LABEL: nxv2f64_32b_aligned: +; CHECK-LDPALIGNEDONLY: // %bb.0: +; CHECK-LDPALIGNEDONLY-NEXT: ldp q0, q1, [x0] +; CHECK-LDPALIGNEDONLY-NEXT: stp q0, q1, [x1] +; CHECK-LDPALIGNEDONLY-NEXT: ret +; +; CHECK-STPALIGNEDONLY-LABEL: nxv2f64_32b_aligned: +; CHECK-STPALIGNEDONLY: // %bb.0: +; CHECK-STPALIGNEDONLY-NEXT: ldp q0, q1, [x0] +; CHECK-STPALIGNEDONLY-NEXT: stp q0, q1, [x1] +; CHECK-STPALIGNEDONLY-NEXT: ret +; +; CHECK-OFF-LABEL: nxv2f64_32b_aligned: +; CHECK-OFF: // %bb.0: +; CHECK-OFF-NEXT: ldr z0, [x0] +; CHECK-OFF-NEXT: ldr z1, [x0, #1, mul vl] +; CHECK-OFF-NEXT: str z0, [x1] +; CHECK-OFF-NEXT: str z1, [x1, #1, mul vl] +; CHECK-OFF-NEXT: ret + %vscale = tail call i64 @llvm.vscale() + %vl = shl nuw nsw i64 %vscale, 4 + %ldptr2 = getelementptr inbounds nuw i8, ptr %ldptr, i64 %vl + %stptr2 = getelementptr inbounds nuw i8, ptr %stptr, i64 %vl + %ld1 = load <vscale x 2 x double>, ptr %ldptr, align 32 + %ld2 = load <vscale x 2 x double>, ptr %ldptr2, align 32 + store <vscale x 2 x double> %ld1, ptr %stptr, align 32 + store <vscale x 2 x double> %ld2, ptr %stptr2, align 32 + ret void +} diff --git a/llvm/test/CodeGen/AArch64/branch-relax-block-size.mir b/llvm/test/CodeGen/AArch64/branch-relax-block-size.mir index 2684a550d921..6f07d1b2a6ea 100644 --- a/llvm/test/CodeGen/AArch64/branch-relax-block-size.mir +++ b/llvm/test/CodeGen/AArch64/branch-relax-block-size.mir @@ -1,5 +1,6 @@ # REQUIRES: asserts # RUN: llc -mtriple=aarch64--linux-gnu -run-pass=branch-relaxation -debug-only=branch-relaxation %s -o /dev/null 2>&1 | FileCheck %s +# RUN: llc -mtriple=aarch64--linux-gnu -passes=branch-relaxation -debug-only=branch-relaxation %s -o /dev/null 2>&1 | FileCheck %s # Ensure meta instructions (e.g. CFI_INSTRUCTION) don't contribute to the code # size of a basic block. diff --git a/llvm/test/CodeGen/AArch64/branch-relax-cross-section.mir b/llvm/test/CodeGen/AArch64/branch-relax-cross-section.mir index db88bf0044a5..000246ad8299 100644 --- a/llvm/test/CodeGen/AArch64/branch-relax-cross-section.mir +++ b/llvm/test/CodeGen/AArch64/branch-relax-cross-section.mir @@ -1,6 +1,8 @@ # RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass branch-relaxation -aarch64-b-offset-bits=64 -aarch64-tbz-offset-bits=9 -aarch64-cbz-offset-bits=9 %s -o - | FileCheck %s # RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass branch-relaxation -aarch64-tbz-offset-bits=9 -aarch64-cbz-offset-bits=9 %s -o - | FileCheck --check-prefix=INDIRECT %s +# RUN: llc -mtriple=aarch64-none-linux-gnu -passes branch-relaxation -aarch64-tbz-offset-bits=9 -aarch64-cbz-offset-bits=9 %s -o - | FileCheck --check-prefix=INDIRECT %s + --- | declare i32 @bar() declare i32 @baz() diff --git a/llvm/test/CodeGen/AArch64/peephole-orr.mir b/llvm/test/CodeGen/AArch64/peephole-orr.mir index 3431676438bd..f718328ecf2d 100644 --- a/llvm/test/CodeGen/AArch64/peephole-orr.mir +++ b/llvm/test/CodeGen/AArch64/peephole-orr.mir @@ -1,6 +1,49 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -run-pass=aarch64-mi-peephole-opt -o - -mtriple=aarch64-unknown-linux -verify-machineinstrs %s | FileCheck %s - +--- +name: copy_fpr128_gpr32 +body: | + bb.0: + liveins: $q0 + ; CHECK-LABEL: name: copy_fpr128_gpr32 + ; CHECK: liveins: $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr32 = COPY [[COPY]].ssub + ; CHECK-NEXT: [[FMOVSWr:%[0-9]+]]:gpr32 = FMOVSWr [[COPY1]] + %0:fpr128 = COPY $q0 + %1:gpr32 = COPY %0.ssub:fpr128 + %2:gpr32 = ORRWrs $wzr, killed %1:gpr32, 0 +... +--- +name: copy_fpr32_gpr32 +body: | + bb.0: + liveins: $s0 + ; CHECK-LABEL: name: copy_fpr32_gpr32 + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr32 = COPY $s0 + ; CHECK-NEXT: [[FMOVSWr:%[0-9]+]]:gpr32 = FMOVSWr [[COPY]] + %0:fpr32 = COPY $s0 + %1:gpr32 = COPY %0:fpr32 + %2:gpr32 = ORRWrs $wzr, killed %1:gpr32, 0 +... +--- +name: copy_zpr_gpr32 +body: | + bb.0: + liveins: $z0 + ; CHECK-LABEL: name: copy_zpr_gpr32 + ; CHECK: liveins: $z0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:zpr = COPY $z0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr32 = COPY [[COPY]].ssub + ; CHECK-NEXT: [[FMOVSWr:%[0-9]+]]:gpr32 = FMOVSWr [[COPY1]] + %0:zpr = COPY $z0 + %1:gpr32 = COPY %0.ssub:zpr + %2:gpr32 = ORRWrs $wzr, killed %1:gpr32, 0 +... --- name: copy_multiple_uses tracksRegLiveness: true diff --git a/llvm/test/CodeGen/AArch64/sve-vls-ldst-opt.mir b/llvm/test/CodeGen/AArch64/sve-vls-ldst-opt.mir new file mode 100644 index 000000000000..49453bc17891 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-vls-ldst-opt.mir @@ -0,0 +1,74 @@ +# RUN: llc -mtriple=aarch64-unknown-linux -mattr=+sve -aarch64-sve-vector-bits-min=128 -aarch64-sve-vector-bits-max=128 -run-pass=aarch64-ldst-opt -verify-machineinstrs %s -o - | FileCheck %s +--- +name: pair-sve-fill-spill +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + renamable $z0 = LDR_ZXI renamable $x0, 0 :: (load (<vscale x 1 x s128>)) + renamable $z1 = LDR_ZXI killed renamable $x0, 1 :: (load (<vscale x 1 x s128>)) + STR_ZXI killed renamable $z0, renamable $x1, 0 :: (store (<vscale x 1 x s128>)) + STR_ZXI killed renamable $z1, killed renamable $x1, 1 :: (store (<vscale x 1 x s128>)) + RET_ReallyLR +... +# CHECK-LABEL: name: pair-sve-fill-spill +# CHECK: $q0, $q1 = LDPQi renamable $x0, 0 :: (load (<vscale x 1 x s128>)) +# CHECK: STPQi killed $q0, killed $q1, renamable $x1, 0 :: (store (<vscale x 1 x s128>)) +--- +name: do-not-pair-sve-with-neon-scaled +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + ; SVE LDR + Neon LDR + renamable $z0 = LDR_ZXI renamable $x0, 0 :: (load (<vscale x 1 x s128>)) + renamable $q1 = LDRQui renamable $x0, 1 :: (load (s128)) + ; Neon LDR + SVE LDR + renamable $q2 = LDRQui renamable $x0, 3 :: (load (s128)) + renamable $z3 = LDR_ZXI renamable $x0, 4 :: (load (<vscale x 1 x s128>)) + ; SVE STR + Neon STR + STR_ZXI killed renamable $z0, renamable $x1, 0 :: (store (<vscale x 1 x s128>)) + STRQui killed renamable $q1, renamable $x1, 1 :: (store (s128)) + ; Neon STR + SVE STR + STRQui killed renamable $q2, renamable $x1, 3 :: (store (s128)) + STR_ZXI killed renamable $z3, renamable $x1, 4 :: (store (<vscale x 1 x s128>)) + RET_ReallyLR +... +# CHECK-LABEL: name: do-not-pair-sve-with-neon-scaled +# CHECK: renamable $z0 = LDR_ZXI renamable $x0, 0 :: (load (<vscale x 1 x s128>)) +# CHECK: renamable $q1 = LDRQui renamable $x0, 1 :: (load (s128)) +# CHECK: renamable $q2 = LDRQui renamable $x0, 3 :: (load (s128)) +# CHECK: renamable $z3 = LDR_ZXI renamable $x0, 4 :: (load (<vscale x 1 x s128>)) +# CHECK: STR_ZXI killed renamable $z0, renamable $x1, 0 :: (store (<vscale x 1 x s128>)) +# CHECK: STRQui killed renamable $q1, renamable $x1, 1 :: (store (s128)) +# CHECK: STRQui killed renamable $q2, renamable $x1, 3 :: (store (s128)) +# CHECK: STR_ZXI killed renamable $z3, renamable $x1, 4 :: (store (<vscale x 1 x s128>)) +--- +name: do-not-pair-sve-with-neon-unscaled +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + ; SVE LDR + Neon LDUR + renamable $z0 = LDR_ZXI renamable $x0, 0 :: (load (<vscale x 1 x s128>)) + renamable $q1 = LDURQi renamable $x0, 16 :: (load (s128)) + ; Neon LDUR + SVE LDR + renamable $q2 = LDURQi renamable $x0, 48 :: (load (s128)) + renamable $z3 = LDR_ZXI renamable $x0, 4 :: (load (<vscale x 1 x s128>)) + ; SVE STR + Neon STUR + STR_ZXI killed renamable $z0, renamable $x1, 0 :: (store (<vscale x 1 x s128>)) + STURQi killed renamable $q1, renamable $x1, 16 :: (store (s128)) + ; Neon STUR + SVE STR + STURQi killed renamable $q2, renamable $x1, 48 :: (store (s128)) + STR_ZXI killed renamable $z3, renamable $x1, 4 :: (store (<vscale x 1 x s128>)) + RET_ReallyLR +... +# CHECK-LABEL: name: do-not-pair-sve-with-neon-unscaled +# CHECK: renamable $z0 = LDR_ZXI renamable $x0, 0 :: (load (<vscale x 1 x s128>)) +# CHECK: renamable $q1 = LDURQi renamable $x0, 16 :: (load (s128)) +# CHECK: renamable $q2 = LDURQi renamable $x0, 48 :: (load (s128)) +# CHECK: renamable $z3 = LDR_ZXI renamable $x0, 4 :: (load (<vscale x 1 x s128>)) +# CHECK: STR_ZXI killed renamable $z0, renamable $x1, 0 :: (store (<vscale x 1 x s128>)) +# CHECK: STURQi killed renamable $q1, renamable $x1, 16 :: (store (s128)) +# CHECK: STURQi killed renamable $q2, renamable $x1, 48 :: (store (s128)) +# CHECK: STR_ZXI killed renamable $z3, renamable $x1, 4 :: (store (<vscale x 1 x s128>)) diff --git a/llvm/test/CodeGen/AMDGPU/branch-relax-no-terminators.mir b/llvm/test/CodeGen/AMDGPU/branch-relax-no-terminators.mir index e4d9fbfb1705..474ba71b0eba 100644 --- a/llvm/test/CodeGen/AMDGPU/branch-relax-no-terminators.mir +++ b/llvm/test/CodeGen/AMDGPU/branch-relax-no-terminators.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a --amdgpu-s-branch-bits=5 -run-pass branch-relaxation %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a --amdgpu-s-branch-bits=5 -passes=branch-relaxation %s -o - | FileCheck %s --- name: branch_no_terminators diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-non-byte-sizes.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-non-byte-sizes.ll new file mode 100644 index 000000000000..4095347d7862 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-non-byte-sizes.ll @@ -0,0 +1,204 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-promote-alloca < %s | FileCheck %s + +; Check that types where the store/allocation sizes don't match the type size +; don't crash. + + +define <7 x i9> @load_elem_i9_access_7xi9() { +; CHECK-LABEL: @load_elem_i9_access_7xi9( +; CHECK-NEXT: [[P:%.*]] = alloca <16 x i9>, align 1, addrspace(5) +; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr addrspace(5) [[P]], i64 4 +; CHECK-NEXT: [[L:%.*]] = load <7 x i9>, ptr addrspace(5) [[G]], align 1 +; CHECK-NEXT: ret <7 x i9> [[L]] +; + %p = alloca <16 x i9>, align 1, addrspace(5) + %g = getelementptr i8, ptr addrspace(5) %p, i64 4 + %l = load <7 x i9>, ptr addrspace(5) %g, align 1 + ret <7 x i9> %l +} + +define <8 x i1> @load_elem_i1_access_8xi1() { +; CHECK-LABEL: @load_elem_i1_access_8xi1( +; CHECK-NEXT: [[P:%.*]] = alloca <16 x i1>, align 1, addrspace(5) +; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr addrspace(5) [[P]], i64 4 +; CHECK-NEXT: [[L:%.*]] = load <8 x i1>, ptr addrspace(5) [[G]], align 1 +; CHECK-NEXT: ret <8 x i1> [[L]] +; + %p = alloca <16 x i1>, align 1, addrspace(5) + %g = getelementptr i8, ptr addrspace(5) %p, i64 4 + %l = load <8 x i1>, ptr addrspace(5) %g, align 1 + ret <8 x i1> %l +} + +define <3 x i1> @load_elem_i1_access_3xi1() { +; CHECK-LABEL: @load_elem_i1_access_3xi1( +; CHECK-NEXT: [[P:%.*]] = alloca <16 x i1>, align 1, addrspace(5) +; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr addrspace(5) [[P]], i64 4 +; CHECK-NEXT: [[L:%.*]] = load <3 x i1>, ptr addrspace(5) [[G]], align 1 +; CHECK-NEXT: ret <3 x i1> [[L]] +; + %p = alloca <16 x i1>, align 1, addrspace(5) + %g = getelementptr i8, ptr addrspace(5) %p, i64 4 + %l = load <3 x i1>, ptr addrspace(5) %g, align 1 + ret <3 x i1> %l +} + +define <3 x i1> @load_elem_i8_access_3xi1() { +; CHECK-LABEL: @load_elem_i8_access_3xi1( +; CHECK-NEXT: [[P:%.*]] = alloca <8 x i8>, align 1, addrspace(5) +; CHECK-NEXT: store <8 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, ptr addrspace(5) [[P]], align 1 +; CHECK-NEXT: [[G:%.*]] = getelementptr <4 x i8>, ptr addrspace(5) [[P]], i64 1 +; CHECK-NEXT: [[L:%.*]] = load <3 x i1>, ptr addrspace(5) [[G]], align 1 +; CHECK-NEXT: ret <3 x i1> [[L]] +; + %p = alloca <8 x i8>, align 1, addrspace(5) + store <8 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, ptr addrspace(5) %p, align 1 + %g = getelementptr <4 x i8>, ptr addrspace(5) %p, i64 1 + %l = load <3 x i1>, ptr addrspace(5) %g, align 1 + ret <3 x i1> %l +} + +; This one is actually not problematic. +define <8 x i1> @load_elem_i8_access_8xi1() { +; CHECK-LABEL: @load_elem_i8_access_8xi1( +; CHECK-NEXT: [[P:%.*]] = freeze <8 x i8> poison +; CHECK-NEXT: ret <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false> +; + %p = alloca <8 x i8>, align 1, addrspace(5) + store <8 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, ptr addrspace(5) %p, align 1 + %g = getelementptr <4 x i8>, ptr addrspace(5) %p, i64 1 + %l = load <8 x i1>, ptr addrspace(5) %g, align 1 + ret <8 x i1> %l +} + +define <8 x i1> @storeload_elem_i1_access_8xi1() { +; CHECK-LABEL: @storeload_elem_i1_access_8xi1( +; CHECK-NEXT: [[P:%.*]] = alloca <16 x i1>, align 1, addrspace(5) +; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr addrspace(5) [[P]], i64 4 +; CHECK-NEXT: store <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false>, ptr addrspace(5) [[G]], align 1 +; CHECK-NEXT: [[L:%.*]] = load <8 x i1>, ptr addrspace(5) [[G]], align 1 +; CHECK-NEXT: ret <8 x i1> [[L]] +; + %p = alloca <16 x i1>, align 1, addrspace(5) + %g = getelementptr i8, ptr addrspace(5) %p, i64 4 + store <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false>, ptr addrspace(5) %g, align 1 + %l = load <8 x i1>, ptr addrspace(5) %g, align 1 + ret <8 x i1> %l +} + +define <3 x i1> @storeload_elem_i1_access_3xi1() { +; CHECK-LABEL: @storeload_elem_i1_access_3xi1( +; CHECK-NEXT: [[P:%.*]] = alloca <16 x i1>, align 1, addrspace(5) +; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr addrspace(5) [[P]], i64 4 +; CHECK-NEXT: store <3 x i1> <i1 true, i1 false, i1 true>, ptr addrspace(5) [[G]], align 1 +; CHECK-NEXT: [[L:%.*]] = load <3 x i1>, ptr addrspace(5) [[G]], align 1 +; CHECK-NEXT: ret <3 x i1> [[L]] +; + %p = alloca <16 x i1>, align 1, addrspace(5) + %g = getelementptr i8, ptr addrspace(5) %p, i64 4 + store <3 x i1> <i1 true, i1 false, i1 true>, ptr addrspace(5) %g, align 1 + %l = load <3 x i1>, ptr addrspace(5) %g, align 1 + ret <3 x i1> %l +} + +define <3 x i1> @storeload_elem_i8_access_3xi1() { +; CHECK-LABEL: @storeload_elem_i8_access_3xi1( +; CHECK-NEXT: [[P:%.*]] = alloca <8 x i8>, align 1, addrspace(5) +; CHECK-NEXT: store <8 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, ptr addrspace(5) [[P]], align 1 +; CHECK-NEXT: [[G:%.*]] = getelementptr <4 x i8>, ptr addrspace(5) [[P]], i64 1 +; CHECK-NEXT: store <3 x i1> <i1 true, i1 false, i1 true>, ptr addrspace(5) [[G]], align 1 +; CHECK-NEXT: [[L:%.*]] = load <3 x i1>, ptr addrspace(5) [[G]], align 1 +; CHECK-NEXT: ret <3 x i1> [[L]] +; + %p = alloca <8 x i8>, align 1, addrspace(5) + store <8 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, ptr addrspace(5) %p, align 1 + %g = getelementptr <4 x i8>, ptr addrspace(5) %p, i64 1 + store <3 x i1> <i1 true, i1 false, i1 true>, ptr addrspace(5) %g, align 1 + %l = load <3 x i1>, ptr addrspace(5) %g, align 1 + ret <3 x i1> %l +} + +; This one is actually not problematic. +define <8 x i1> @storeload_elem_i8_access_8xi1() { +; CHECK-LABEL: @storeload_elem_i8_access_8xi1( +; CHECK-NEXT: [[P:%.*]] = freeze <8 x i8> poison +; CHECK-NEXT: ret <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false> +; + %p = alloca <8 x i8>, align 1, addrspace(5) + store <8 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, ptr addrspace(5) %p, align 1 + %g = getelementptr <4 x i8>, ptr addrspace(5) %p, i64 1 + store <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false>, ptr addrspace(5) %g, align 1 + %l = load <8 x i1>, ptr addrspace(5) %g, align 1 + ret <8 x i1> %l +} + +define <8 x i1> @array_of_vec_elem_i1_access_8xi1() { +; CHECK-LABEL: @array_of_vec_elem_i1_access_8xi1( +; CHECK-NEXT: [[P:%.*]] = alloca [2 x <16 x i1>], align 1, addrspace(5) +; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr addrspace(5) [[P]], i64 4 +; CHECK-NEXT: store <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false>, ptr addrspace(5) [[G]], align 1 +; CHECK-NEXT: [[L:%.*]] = load <8 x i1>, ptr addrspace(5) [[G]], align 1 +; CHECK-NEXT: ret <8 x i1> [[L]] +; + %p = alloca [2 x <16 x i1>], align 1, addrspace(5) + %g = getelementptr i8, ptr addrspace(5) %p, i64 4 + store <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false>, ptr addrspace(5) %g, align 1 + %l = load <8 x i1>, ptr addrspace(5) %g, align 1 + ret <8 x i1> %l +} + +define <3 x i1> @array_of_vec_elem_i1_access_3xi1() { +; CHECK-LABEL: @array_of_vec_elem_i1_access_3xi1( +; CHECK-NEXT: [[P:%.*]] = alloca [2 x <16 x i1>], align 1, addrspace(5) +; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr addrspace(5) [[P]], i64 4 +; CHECK-NEXT: store <3 x i1> <i1 true, i1 false, i1 true>, ptr addrspace(5) [[G]], align 1 +; CHECK-NEXT: [[L:%.*]] = load <3 x i1>, ptr addrspace(5) [[G]], align 1 +; CHECK-NEXT: ret <3 x i1> [[L]] +; + %p = alloca [2 x <16 x i1>], align 1, addrspace(5) + %g = getelementptr i8, ptr addrspace(5) %p, i64 4 + store <3 x i1> <i1 true, i1 false, i1 true>, ptr addrspace(5) %g, align 1 + %l = load <3 x i1>, ptr addrspace(5) %g, align 1 + ret <3 x i1> %l +} + +define <3 x i1> @array_of_vec_elem_i8_access_3xi1() { +; CHECK-LABEL: @array_of_vec_elem_i8_access_3xi1( +; CHECK-NEXT: [[P:%.*]] = alloca [2 x <8 x i8>], align 1, addrspace(5) +; CHECK-NEXT: store <8 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, ptr addrspace(5) [[P]], align 1 +; CHECK-NEXT: [[G:%.*]] = getelementptr <4 x i8>, ptr addrspace(5) [[P]], i64 1 +; CHECK-NEXT: store <3 x i1> <i1 true, i1 false, i1 true>, ptr addrspace(5) [[G]], align 1 +; CHECK-NEXT: [[L:%.*]] = load <3 x i1>, ptr addrspace(5) [[G]], align 1 +; CHECK-NEXT: ret <3 x i1> [[L]] +; + %p = alloca [2 x <8 x i8>], align 1, addrspace(5) + store <8 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, ptr addrspace(5) %p, align 1 + %g = getelementptr <4 x i8>, ptr addrspace(5) %p, i64 1 + store <3 x i1> <i1 true, i1 false, i1 true>, ptr addrspace(5) %g, align 1 + %l = load <3 x i1>, ptr addrspace(5) %g, align 1 + ret <3 x i1> %l +} + +; This one is actually not problematic. +define <8 x i1> @array_of_vec_elem_i8_access_8xi1() { +; CHECK-LABEL: @array_of_vec_elem_i8_access_8xi1( +; CHECK-NEXT: [[P:%.*]] = freeze <16 x i8> poison +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i8> [[P]], i8 1, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x i8> [[TMP1]], i8 2, i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> [[TMP2]], i8 3, i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x i8> [[TMP3]], i8 4, i32 3 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x i8> [[TMP4]], i8 5, i32 4 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x i8> [[TMP5]], i8 6, i32 5 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x i8> [[TMP6]], i8 7, i32 6 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x i8> [[TMP7]], i8 8, i32 7 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x i8> [[TMP8]], i8 5, i32 4 +; CHECK-NEXT: ret <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false> +; + %p = alloca [2 x <8 x i8>], align 1, addrspace(5) + store <8 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, ptr addrspace(5) %p, align 1 + %g = getelementptr <4 x i8>, ptr addrspace(5) %p, i64 1 + store <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false>, ptr addrspace(5) %g, align 1 + %l = load <8 x i1>, ptr addrspace(5) %g, align 1 + ret <8 x i1> %l +} diff --git a/llvm/test/CodeGen/AMDGPU/swdev380865.ll b/llvm/test/CodeGen/AMDGPU/swdev380865.ll index 9189cef019cf..4a5dc8f300af 100644 --- a/llvm/test/CodeGen/AMDGPU/swdev380865.ll +++ b/llvm/test/CodeGen/AMDGPU/swdev380865.ll @@ -16,15 +16,16 @@ define amdgpu_kernel void @_Z6kernelILi4000ELi1EEvPd(ptr addrspace(1) %x.coerce) ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_mov_b64 s[0:1], 0 ; CHECK-NEXT: s_load_dword s2, s[0:1], 0x0 +; CHECK-NEXT: s_mov_b64 s[0:1], 0x100 ; CHECK-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0 ; CHECK-NEXT: s_mov_b32 s4, 0 ; CHECK-NEXT: s_mov_b32 s0, 0 -; CHECK-NEXT: s_mov_b32 s5, 0x40280000 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_mov_b32 s1, s2 ; CHECK-NEXT: s_mov_b32 s2, 0 ; CHECK-NEXT: v_mov_b32_e32 v0, s6 ; CHECK-NEXT: s_mov_b32 s3, 0x40260000 +; CHECK-NEXT: s_mov_b32 s5, 0x40280000 ; CHECK-NEXT: v_mov_b32_e32 v1, s7 ; CHECK-NEXT: .LBB0_1: ; %for.cond4.preheader ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -50,7 +51,7 @@ define amdgpu_kernel void @_Z6kernelILi4000ELi1EEvPd(ptr addrspace(1) %x.coerce) ; CHECK-NEXT: v_add_f64 v[0:1], v[0:1], s[4:5] ; CHECK-NEXT: s_cbranch_scc1 .LBB0_1 ; CHECK-NEXT: ; %bb.2: ; %for.cond.cleanup.loopexit -; CHECK-NEXT: v_mov_b32_e32 v2, 0 +; CHECK-NEXT: v_mov_b32_e32 v2, 0x100 ; CHECK-NEXT: v_mov_b32_e32 v3, 0 ; CHECK-NEXT: global_store_dwordx2 v[2:3], v[0:1], off ; CHECK-NEXT: s_endpgm @@ -61,7 +62,7 @@ entry: for.cond4.preheader: ; preds = %for.cond4.preheader, %entry %idx.07 = phi i32 [ %add13, %for.cond4.preheader ], [ 0, %entry ] - %arrayidx.promoted = load double, ptr addrspace(1) null, align 8 + %arrayidx.promoted = load double, ptr addrspace(1) inttoptr (i64 256 to ptr addrspace(1)), align 8 %add9 = fadd contract double %arrayidx.promoted, 0.000000e+00 %add9.1 = fadd contract double %add9, 5.000000e+00 %add9.2 = fadd contract double %add9.1, 6.000000e+00 @@ -70,7 +71,7 @@ for.cond4.preheader: ; preds = %for.cond4.preheader %add9.5 = fadd contract double %add9.4, 1.000000e+01 %add9.6 = fadd contract double %add9.5, 1.100000e+01 %add9.7 = fadd contract double %add9.6, 1.200000e+01 - store double %add9.7, ptr addrspace(1) null, align 8 + store double %add9.7, ptr addrspace(1) inttoptr (i64 256 to ptr addrspace(1)), align 8 %add13 = add i32 %idx.07, %0 %cmp = icmp slt i32 %add13, 2560 br i1 %cmp, label %for.cond4.preheader, label %for.cond.cleanup diff --git a/llvm/test/CodeGen/PowerPC/pr43527.ll b/llvm/test/CodeGen/PowerPC/pr43527.ll index 379bd6c070c7..adfea51077a0 100644 --- a/llvm/test/CodeGen/PowerPC/pr43527.ll +++ b/llvm/test/CodeGen/PowerPC/pr43527.ll @@ -2,7 +2,7 @@ ; RUN: llc -ppc-asm-full-reg-names -verify-machineinstrs \ ; RUN: -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s ; We don't want to produce a CTR loop due to the call to lrint in the body. -define dso_local void @test(i64 %arg, i64 %arg1) { +define dso_local void @test(i64 %arg, i64 %arg1, ptr %arg2) { ; CHECK-LABEL: test: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: bc 4, 4*cr5+lt, .LBB0_5 @@ -12,29 +12,33 @@ define dso_local void @test(i64 %arg, i64 %arg1) { ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: .cfi_def_cfa_offset 64 ; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r28, -32 ; CHECK-NEXT: .cfi_offset r29, -24 ; CHECK-NEXT: .cfi_offset r30, -16 +; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-NEXT: stdu r1, -64(r1) -; CHECK-NEXT: sub r30, r4, r3 -; CHECK-NEXT: li r29, -4 +; CHECK-NEXT: mr r30, r5 +; CHECK-NEXT: sub r29, r4, r3 +; CHECK-NEXT: addi r28, r5, -4 ; CHECK-NEXT: std r0, 80(r1) ; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB0_3: # %bb5 ; CHECK-NEXT: # -; CHECK-NEXT: lfsu f1, 4(r29) +; CHECK-NEXT: lfsu f1, 4(r28) ; CHECK-NEXT: bl lrint ; CHECK-NEXT: nop -; CHECK-NEXT: addi r30, r30, -1 -; CHECK-NEXT: cmpldi r30, 0 +; CHECK-NEXT: addi r29, r29, -1 +; CHECK-NEXT: stb r3, 0(r30) +; CHECK-NEXT: cmpldi r29, 0 ; CHECK-NEXT: bc 12, gt, .LBB0_3 ; CHECK-NEXT: # %bb.4: # %bb15 -; CHECK-NEXT: stb r3, 0(r3) ; CHECK-NEXT: addi r1, r1, 64 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r28, -32(r1) # 8-byte Folded Reload ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr ; CHECK-NEXT: .LBB0_5: # %bb2 @@ -54,12 +58,12 @@ bb4: ; preds = %bb3 bb5: ; preds = %bb5, %bb4 %tmp6 = phi i64 [ %tmp12, %bb5 ], [ 0, %bb4 ] - %tmp7 = getelementptr inbounds float, ptr null, i64 %tmp6 + %tmp7 = getelementptr inbounds float, ptr %arg2, i64 %tmp6 %tmp8 = load float, ptr %tmp7, align 4 %tmp9 = fpext float %tmp8 to double %tmp10 = tail call i64 @llvm.lrint.i64.f64(double %tmp9) #2 %tmp11 = trunc i64 %tmp10 to i8 - store i8 %tmp11, ptr undef, align 1 + store i8 %tmp11, ptr %arg2, align 1 %tmp12 = add nuw i64 %tmp6, 1 %tmp13 = icmp eq i64 %tmp12, %tmp br i1 %tmp13, label %bb15, label %bb5 diff --git a/llvm/test/CodeGen/PowerPC/pr48519.ll b/llvm/test/CodeGen/PowerPC/pr48519.ll index fa156454a131..b610f12159ee 100644 --- a/llvm/test/CodeGen/PowerPC/pr48519.ll +++ b/llvm/test/CodeGen/PowerPC/pr48519.ll @@ -32,7 +32,7 @@ define void @julia__typed_vcat_20() #0 { ; CHECK-NEXT: # %bb.2: # %bb11 ; CHECK-NEXT: bl __truncsfhf2 ; CHECK-NEXT: nop -; CHECK-NEXT: sth r3, 0(r3) +; CHECK-NEXT: sth r3, 128(0) ; ; CHECK-P9-LABEL: julia__typed_vcat_20: ; CHECK-P9: # %bb.0: # %bb @@ -54,6 +54,7 @@ define void @julia__typed_vcat_20() #0 { ; CHECK-P9-NEXT: bdnz .LBB0_1 ; CHECK-P9-NEXT: # %bb.2: # %bb11 ; CHECK-P9-NEXT: xscvdphp f0, f0 +; CHECK-P9-NEXT: li r3, 128 ; CHECK-P9-NEXT: stxsihx f0, 0, r3 bb: %i = load i64, ptr addrspace(11) null, align 8 @@ -67,7 +68,7 @@ bb3: ; preds = %bb3, %bb %i6 = add nsw i64 %i5, -1 %i7 = add i64 %i6, 0 %i8 = sitofp i64 %i7 to half - store half %i8, ptr addrspace(13) undef, align 2 + store half %i8, ptr addrspace(13) inttoptr (i64 128 to ptr addrspace(13)), align 2 %i9 = icmp eq i64 %i4, 0 %i10 = add i64 %i4, 1 br i1 %i9, label %bb11, label %bb3 diff --git a/llvm/test/CodeGen/PowerPC/sms-grp-order.ll b/llvm/test/CodeGen/PowerPC/sms-grp-order.ll index f72598cb4cbc..eaea47608eb2 100644 --- a/llvm/test/CodeGen/PowerPC/sms-grp-order.ll +++ b/llvm/test/CodeGen/PowerPC/sms-grp-order.ll @@ -2,32 +2,32 @@ ; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs\ ; RUN: -mcpu=pwr9 --ppc-enable-pipeliner | FileCheck %s -define void @lame_encode_buffer_interleaved() local_unnamed_addr { +define void @lame_encode_buffer_interleaved(ptr %arg0) local_unnamed_addr { ; CHECK-LABEL: lame_encode_buffer_interleaved: ; CHECK: # %bb.0: -; CHECK-NEXT: lha 3, 0(3) -; CHECK-NEXT: li 5, 1 -; CHECK-NEXT: lhz 4, 0(0) -; CHECK-NEXT: rldic 5, 5, 62, 1 -; CHECK-NEXT: mtctr 5 -; CHECK-NEXT: srawi 3, 3, 1 -; CHECK-NEXT: addze 3, 3 +; CHECK-NEXT: li 4, 1 +; CHECK-NEXT: rldic 4, 4, 62, 1 +; CHECK-NEXT: mtctr 4 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_1: -; CHECK-NEXT: extsh 4, 4 +; CHECK-NEXT: lha 4, 0(3) +; CHECK-NEXT: lha 5, 0(3) ; CHECK-NEXT: srawi 4, 4, 1 ; CHECK-NEXT: addze 4, 4 +; CHECK-NEXT: srawi 5, 5, 1 +; CHECK-NEXT: addze 5, 5 +; CHECK-NEXT: sth 4, 0(3) +; CHECK-NEXT: sth 5, 0(3) ; CHECK-NEXT: bdnz .LBB0_1 ; CHECK-NEXT: # %bb.2: -; CHECK-NEXT: sth 4, 0(0) -; CHECK-NEXT: sth 3, 0(3) ; CHECK-NEXT: blr + %undef = freeze ptr poison br label %1 1: ; preds = %1, %0 %2 = phi i64 [ 0, %0 ], [ %13, %1 ] - %3 = load i16, ptr null, align 2 - %4 = load i16, ptr undef, align 2 + %3 = load i16, ptr %arg0, align 2 + %4 = load i16, ptr %undef, align 2 %5 = sext i16 %3 to i32 %6 = sext i16 %4 to i32 %7 = add nsw i32 0, %5 @@ -36,8 +36,8 @@ define void @lame_encode_buffer_interleaved() local_unnamed_addr { %10 = sdiv i32 %8, 2 %11 = trunc i32 %9 to i16 %12 = trunc i32 %10 to i16 - store i16 %11, ptr null, align 2 - store i16 %12, ptr undef, align 2 + store i16 %11, ptr %arg0, align 2 + store i16 %12, ptr %undef, align 2 %13 = add i64 %2, 4 %14 = icmp eq i64 %13, 0 br i1 %14, label %15, label %1 diff --git a/llvm/test/CodeGen/RISCV/emit-x8-as-fp.ll b/llvm/test/CodeGen/RISCV/emit-x8-as-fp.ll new file mode 100644 index 000000000000..0c9ef2176fd7 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/emit-x8-as-fp.ll @@ -0,0 +1,121 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32I-DEFAULT %s +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64I-DEFAULT %s +; RUN: llc -mtriple=riscv32 -M emit-x8-as-fp -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32I-EMIT-FP %s +; RUN: llc -mtriple=riscv64 -M emit-x8-as-fp -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64I-EMIT-FP %s +; RUN: llc -mtriple=riscv32 -M numeric -M emit-x8-as-fp -verify-machineinstrs \ +; RUN: < %s | FileCheck -check-prefix=RV32I-NUMERIC %s +; RUN: llc -mtriple=riscv64 -M numeric -M emit-x8-as-fp -verify-machineinstrs \ +; RUN: < %s | FileCheck -check-prefix=RV64I-NUMERIC %s + +define signext i32 @add(i32 %0, i32 %1) #0 { +; RV32I-DEFAULT-LABEL: add: +; RV32I-DEFAULT: # %bb.0: +; RV32I-DEFAULT-NEXT: addi sp, sp, -16 +; RV32I-DEFAULT-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-DEFAULT-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-DEFAULT-NEXT: addi s0, sp, 16 +; RV32I-DEFAULT-NEXT: sw a0, -12(s0) +; RV32I-DEFAULT-NEXT: sw a1, -16(s0) +; RV32I-DEFAULT-NEXT: lw a0, -12(s0) +; RV32I-DEFAULT-NEXT: lw a1, -16(s0) +; RV32I-DEFAULT-NEXT: add a0, a0, a1 +; RV32I-DEFAULT-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-DEFAULT-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-DEFAULT-NEXT: addi sp, sp, 16 +; RV32I-DEFAULT-NEXT: ret +; +; RV64I-DEFAULT-LABEL: add: +; RV64I-DEFAULT: # %bb.0: +; RV64I-DEFAULT-NEXT: addi sp, sp, -32 +; RV64I-DEFAULT-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-DEFAULT-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-DEFAULT-NEXT: addi s0, sp, 32 +; RV64I-DEFAULT-NEXT: sw a0, -20(s0) +; RV64I-DEFAULT-NEXT: sw a1, -24(s0) +; RV64I-DEFAULT-NEXT: lw a0, -20(s0) +; RV64I-DEFAULT-NEXT: lw a1, -24(s0) +; RV64I-DEFAULT-NEXT: addw a0, a0, a1 +; RV64I-DEFAULT-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-DEFAULT-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-DEFAULT-NEXT: addi sp, sp, 32 +; RV64I-DEFAULT-NEXT: ret +; +; RV32I-EMIT-FP-LABEL: add: +; RV32I-EMIT-FP: # %bb.0: +; RV32I-EMIT-FP-NEXT: addi sp, sp, -16 +; RV32I-EMIT-FP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-EMIT-FP-NEXT: sw fp, 8(sp) # 4-byte Folded Spill +; RV32I-EMIT-FP-NEXT: addi fp, sp, 16 +; RV32I-EMIT-FP-NEXT: sw a0, -12(fp) +; RV32I-EMIT-FP-NEXT: sw a1, -16(fp) +; RV32I-EMIT-FP-NEXT: lw a0, -12(fp) +; RV32I-EMIT-FP-NEXT: lw a1, -16(fp) +; RV32I-EMIT-FP-NEXT: add a0, a0, a1 +; RV32I-EMIT-FP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-EMIT-FP-NEXT: lw fp, 8(sp) # 4-byte Folded Reload +; RV32I-EMIT-FP-NEXT: addi sp, sp, 16 +; RV32I-EMIT-FP-NEXT: ret +; +; RV64I-EMIT-FP-LABEL: add: +; RV64I-EMIT-FP: # %bb.0: +; RV64I-EMIT-FP-NEXT: addi sp, sp, -32 +; RV64I-EMIT-FP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-EMIT-FP-NEXT: sd fp, 16(sp) # 8-byte Folded Spill +; RV64I-EMIT-FP-NEXT: addi fp, sp, 32 +; RV64I-EMIT-FP-NEXT: sw a0, -20(fp) +; RV64I-EMIT-FP-NEXT: sw a1, -24(fp) +; RV64I-EMIT-FP-NEXT: lw a0, -20(fp) +; RV64I-EMIT-FP-NEXT: lw a1, -24(fp) +; RV64I-EMIT-FP-NEXT: addw a0, a0, a1 +; RV64I-EMIT-FP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-EMIT-FP-NEXT: ld fp, 16(sp) # 8-byte Folded Reload +; RV64I-EMIT-FP-NEXT: addi sp, sp, 32 +; RV64I-EMIT-FP-NEXT: ret +; +; RV32I-NUMERIC-LABEL: add: +; RV32I-NUMERIC: # %bb.0: +; RV32I-NUMERIC-NEXT: addi x2, x2, -16 +; RV32I-NUMERIC-NEXT: sw x1, 12(x2) # 4-byte Folded Spill +; RV32I-NUMERIC-NEXT: sw x8, 8(x2) # 4-byte Folded Spill +; RV32I-NUMERIC-NEXT: addi x8, x2, 16 +; RV32I-NUMERIC-NEXT: sw x10, -12(x8) +; RV32I-NUMERIC-NEXT: sw x11, -16(x8) +; RV32I-NUMERIC-NEXT: lw x10, -12(x8) +; RV32I-NUMERIC-NEXT: lw x11, -16(x8) +; RV32I-NUMERIC-NEXT: add x10, x10, x11 +; RV32I-NUMERIC-NEXT: lw x1, 12(x2) # 4-byte Folded Reload +; RV32I-NUMERIC-NEXT: lw x8, 8(x2) # 4-byte Folded Reload +; RV32I-NUMERIC-NEXT: addi x2, x2, 16 +; RV32I-NUMERIC-NEXT: ret +; +; RV64I-NUMERIC-LABEL: add: +; RV64I-NUMERIC: # %bb.0: +; RV64I-NUMERIC-NEXT: addi x2, x2, -32 +; RV64I-NUMERIC-NEXT: sd x1, 24(x2) # 8-byte Folded Spill +; RV64I-NUMERIC-NEXT: sd x8, 16(x2) # 8-byte Folded Spill +; RV64I-NUMERIC-NEXT: addi x8, x2, 32 +; RV64I-NUMERIC-NEXT: sw x10, -20(x8) +; RV64I-NUMERIC-NEXT: sw x11, -24(x8) +; RV64I-NUMERIC-NEXT: lw x10, -20(x8) +; RV64I-NUMERIC-NEXT: lw x11, -24(x8) +; RV64I-NUMERIC-NEXT: addw x10, x10, x11 +; RV64I-NUMERIC-NEXT: ld x1, 24(x2) # 8-byte Folded Reload +; RV64I-NUMERIC-NEXT: ld x8, 16(x2) # 8-byte Folded Reload +; RV64I-NUMERIC-NEXT: addi x2, x2, 32 +; RV64I-NUMERIC-NEXT: ret + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + store i32 %0, ptr %3, align 4 + store i32 %1, ptr %4, align 4 + %5 = load i32, ptr %3, align 4 + %6 = load i32, ptr %4, align 4 + %7 = add nsw i32 %5, %6 + ret i32 %7 +} + +attributes #0 = { noinline nounwind optnone "frame-pointer"="all" } diff --git a/llvm/test/CodeGen/X86/fake-use-remove-loads.mir b/llvm/test/CodeGen/X86/fake-use-remove-loads.mir index 3f67f03c9a63..aa9839d2700a 100644 --- a/llvm/test/CodeGen/X86/fake-use-remove-loads.mir +++ b/llvm/test/CodeGen/X86/fake-use-remove-loads.mir @@ -3,6 +3,8 @@ # remove-loads-into-fake-uses pass, and that if the function does not use # instruction referencing then no changes are made. # RUN: llc %s -run-pass remove-loads-into-fake-uses -mtriple=x86_64-unknown-linux -debug-only=remove-loads-into-fake-uses 2>&1 -o - | FileCheck %s +# RUN: llc %s -passes remove-loads-into-fake-uses -mtriple=x86_64-unknown-linux -debug-only=remove-loads-into-fake-uses 2>&1 -o - | FileCheck %s + # REQUIRES: asserts # ## We verify that: diff --git a/llvm/test/CodeGen/X86/pr134602.ll b/llvm/test/CodeGen/X86/pr134602.ll new file mode 100644 index 000000000000..e4376cbeab10 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr134602.ll @@ -0,0 +1,25 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefix=X64 + +; FIXME: incorrect vector codegen due to bad handling of splats of binops containing undefs +define i32 @PR134602(i16 %a0) { +; X86-LABEL: PR134602: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: orl $1, %eax +; X86-NEXT: addl $3, %eax +; X86-NEXT: cwtl +; X86-NEXT: retl +; +; X64-LABEL: PR134602: +; X64: # %bb.0: +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: retq + %splat= insertelement <4 x i16> zeroinitializer, i16 %a0, i64 0 + %mul = mul <4 x i16> %splat, <i16 1, i16 1, i16 0, i16 0> + %or = or <4 x i16> splat (i16 1), %mul + %reduce = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %or) + %ret_32 = sext i16 %reduce to i32 + ret i32 %ret_32 +} diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_promote.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_promote.s index b289a4c9aa65..c6782a6c9643 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_promote.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_promote.s @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --sort --version 5 +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --unique --sort --version 5 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=_e32 %s // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=_e32 %s @@ -56,14 +56,23 @@ v_add_f16 v5.l, v255.l, v2.l dpp8:[7,6,5,4,3,2,1,0] v_add_f16 v5.l, v255.l, v2.l quad_perm:[3,2,1,0] // GFX11: v_add_f16_e64_dpp v5.l, v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_fmac_f16 v255, v1, v2 -// GFX11: v_fmac_f16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x36,0xd5,0x01,0x05,0x02,0x00] +v_fmac_f16 v255.h, v1.h, v2.h +// GFX11: v_fmac_f16_e64 v255.h, v1.h, v2.h op_sel:[1,1,1,1] ; encoding: [0xff,0x58,0x36,0xd5,0x01,0x05,0x02,0x00] -v_fmac_f16 v5, v1, v255 -// GFX11: v_fmac_f16_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x36,0xd5,0x01,0xff,0x03,0x00] +v_fmac_f16 v255.l, v1.l, v2.l +// GFX11: v_fmac_f16_e64 v255.l, v1.l, v2.l ; encoding: [0xff,0x00,0x36,0xd5,0x01,0x05,0x02,0x00] -v_fmac_f16 v5, v255, v2 -// GFX11: v_fmac_f16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x36,0xd5,0xff,0x05,0x02,0x00] +v_fmac_f16 v5.h, v1.h, v255.h +// GFX11: v_fmac_f16_e64 v5.h, v1.h, v255.h op_sel:[1,1,1,1] ; encoding: [0x05,0x58,0x36,0xd5,0x01,0xff,0x03,0x00] + +v_fmac_f16 v5.h, v255.h, v2.h +// GFX11: v_fmac_f16_e64 v5.h, v255.h, v2.h op_sel:[1,1,1,1] ; encoding: [0x05,0x58,0x36,0xd5,0xff,0x05,0x02,0x00] + +v_fmac_f16 v5.l, v1.l, v255.l +// GFX11: v_fmac_f16_e64 v5.l, v1.l, v255.l ; encoding: [0x05,0x00,0x36,0xd5,0x01,0xff,0x03,0x00] + +v_fmac_f16 v5.l, v255.l, v2.l +// GFX11: v_fmac_f16_e64 v5.l, v255.l, v2.l ; encoding: [0x05,0x00,0x36,0xd5,0xff,0x05,0x02,0x00] v_ldexp_f16 v255.h, v1.h, v2.h // GFX11: v_ldexp_f16_e64 v255.h, v1.h, v2.h op_sel:[1,1,1] ; encoding: [0xff,0x58,0x3b,0xd5,0x01,0x05,0x02,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop2.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop2.s index 15d6547a0477..cb05bc403778 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop2.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop2.s @@ -524,47 +524,59 @@ v_cvt_pkrtz_f16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 v_cvt_pkrtz_f16_f32_e64_dpp v255, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v255, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x83,0x2f,0xd5,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] -v_fmac_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: v_fmac_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_fmac_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX11: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_fmac_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: v_fmac_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_fmac_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX11: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -v_fmac_f16_e64_dpp v5, v1, v2 row_mirror -// GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_mirror +// GFX11: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -v_fmac_f16_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_half_mirror +// GFX11: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -v_fmac_f16_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_shl:1 +// GFX11: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -v_fmac_f16_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_shl:15 +// GFX11: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -v_fmac_f16_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_shr:1 +// GFX11: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -v_fmac_f16_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_shr:15 +// GFX11: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_fmac_f16_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_ror:1 +// GFX11: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -v_fmac_f16_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_ror:15 +// GFX11: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -v_fmac_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_fmac_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: v_fmac_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x36,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] +v_fmac_f16_e64_dpp v5.l, |v1.l|, -v2.l mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_fmac_f16_e64_dpp v5.l, |v1.l|, -v2.l mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x36,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] -v_fmac_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: v_fmac_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x36,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] +v_fmac_f16_e64_dpp v5.l, -v1.l, |v2.l| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: v_fmac_f16_e64_dpp v5.l, -v1.l, |v2.l| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x36,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] -v_fmac_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: v_fmac_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x83,0x36,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x05,0x30] +v_fmac_f16_e64_dpp v255.l, -|v255.l|, -|v255.l| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: v_fmac_f16_e64_dpp v255.l, -|v255.l|, -|v255.l| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x83,0x36,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x05,0x30] + +v_fmac_f16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_fmac_f16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_fmac_f16_e64_dpp v5.l, |v1.h|, -v2.l mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_fmac_f16_e64_dpp v5.l, |v1.h|, -v2.l op_sel:[1,0,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x09,0x36,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] + +v_fmac_f16_e64_dpp v5.l, -v1.l, |v2.h| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: v_fmac_f16_e64_dpp v5.l, -v1.l, |v2.h| op_sel:[0,1,0,0] mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x12,0x36,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] + +v_fmac_f16_e64_dpp v255.h, -|v255.l|, -|v255.l| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: v_fmac_f16_e64_dpp v255.h, -|v255.l|, -|v255.l| op_sel:[0,0,1,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc3,0x36,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x05,0x30] v_fmac_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] // GFX11: v_fmac_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop2.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop2.s index 2f52b7f467e0..35bbca0b6d0b 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop2.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop2.s @@ -169,17 +169,29 @@ v_cvt_pkrtz_f16_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 v_cvt_pkrtz_f16_f32_e64_dpp v255, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v255, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x83,0x2f,0xd5,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] -v_fmac_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_fmac_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x36,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_fmac_f16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x36,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_fmac_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_fmac_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x36,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] +v_fmac_f16_e64_dpp v5.l, |v1.l|, -v2.l mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_fmac_f16_e64_dpp v5.l, |v1.l|, -v2.l mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x36,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] -v_fmac_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: v_fmac_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x36,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] +v_fmac_f16_e64_dpp v5.l, -v1.l, |v2.l| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_fmac_f16_e64_dpp v5.l, -v1.l, |v2.l| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x36,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] -v_fmac_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: v_fmac_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x83,0x36,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] +v_fmac_f16_e64_dpp v255.l, -|v255.l|, -|v255.l| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: v_fmac_f16_e64_dpp v255.l, -|v255.l|, -|v255.l| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x83,0x36,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] + +v_fmac_f16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_fmac_f16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x36,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_fmac_f16_e64_dpp v5.l, |v1.h|, -v2.l mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_fmac_f16_e64_dpp v5.l, |v1.h|, -v2.l op_sel:[1,0,0,0] mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x09,0x36,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] + +v_fmac_f16_e64_dpp v5.l, -v1.l, |v2.h| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_fmac_f16_e64_dpp v5.l, -v1.l, |v2.h| op_sel:[0,1,0,0] mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x12,0x36,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] + +v_fmac_f16_e64_dpp v255.h, -|v255.l|, -|v255.l| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: v_fmac_f16_e64_dpp v255.h, -|v255.l|, -|v255.l| op_sel:[0,0,1,1] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0xc3,0x36,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] v_fmac_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_fmac_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop2.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop2.s index 5debd064812c..4f410f611c8e 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop2.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop2.s @@ -587,50 +587,59 @@ v_fmac_dx9_zero_f32_e64 v5, -src_scc, |vcc_lo| mul:4 v_fmac_dx9_zero_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 // GFX11: v_fmac_dx9_zero_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x06,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] -v_fmac_f16_e64 v5, v1, v2 -// GFX11: v_fmac_f16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x36,0xd5,0x01,0x05,0x02,0x00] +v_fmac_f16_e64 v5.l, v1.l, v2.l +// GFX11: v_fmac_f16_e64 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x36,0xd5,0x01,0x05,0x02,0x00] -v_fmac_f16_e64 v5, v255, v255 -// GFX11: v_fmac_f16_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x36,0xd5,0xff,0xff,0x03,0x00] +v_fmac_f16_e64 v5.l, v255.l, v255.l +// GFX11: v_fmac_f16_e64 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x36,0xd5,0xff,0xff,0x03,0x00] -v_fmac_f16_e64 v5, s1, s2 -// GFX11: v_fmac_f16_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x36,0xd5,0x01,0x04,0x00,0x00] +v_fmac_f16_e64 v5.l, s1, s2 +// GFX11: v_fmac_f16_e64 v5.l, s1, s2 ; encoding: [0x05,0x00,0x36,0xd5,0x01,0x04,0x00,0x00] -v_fmac_f16_e64 v5, s105, s105 -// GFX11: v_fmac_f16_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x36,0xd5,0x69,0xd2,0x00,0x00] +v_fmac_f16_e64 v5.l, s105, s105 +// GFX11: v_fmac_f16_e64 v5.l, s105, s105 ; encoding: [0x05,0x00,0x36,0xd5,0x69,0xd2,0x00,0x00] -v_fmac_f16_e64 v5, vcc_lo, ttmp15 -// GFX11: v_fmac_f16_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x36,0xd5,0x6a,0xf6,0x00,0x00] +v_fmac_f16_e64 v5.l, vcc_lo, ttmp15 +// GFX11: v_fmac_f16_e64 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x36,0xd5,0x6a,0xf6,0x00,0x00] -v_fmac_f16_e64 v5, vcc_hi, 0xfe0b -// GFX11: v_fmac_f16_e64 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x36,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +v_fmac_f16_e64 v5.l, vcc_hi, 0xfe0b +// GFX11: v_fmac_f16_e64 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x36,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -v_fmac_f16_e64 v5, ttmp15, src_scc -// GFX11: v_fmac_f16_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x36,0xd5,0x7b,0xfa,0x01,0x00] +v_fmac_f16_e64 v5.l, ttmp15, src_scc +// GFX11: v_fmac_f16_e64 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x36,0xd5,0x7b,0xfa,0x01,0x00] -v_fmac_f16_e64 v5, m0, 0.5 -// GFX11: v_fmac_f16_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x36,0xd5,0x7d,0xe0,0x01,0x00] +v_fmac_f16_e64 v5.l, m0, 0.5 +// GFX11: v_fmac_f16_e64 v5.l, m0, 0.5 ; encoding: [0x05,0x00,0x36,0xd5,0x7d,0xe0,0x01,0x00] -v_fmac_f16_e64 v5, exec_lo, -1 -// GFX11: v_fmac_f16_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x36,0xd5,0x7e,0x82,0x01,0x00] +v_fmac_f16_e64 v5.l, exec_lo, -1 +// GFX11: v_fmac_f16_e64 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x36,0xd5,0x7e,0x82,0x01,0x00] -v_fmac_f16_e64 v5, |exec_hi|, null -// GFX11: v_fmac_f16_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x36,0xd5,0x7f,0xf8,0x00,0x00] +v_fmac_f16_e64 v5.l, |exec_hi|, null +// GFX11: v_fmac_f16_e64 v5.l, |exec_hi|, null ; encoding: [0x05,0x01,0x36,0xd5,0x7f,0xf8,0x00,0x00] -v_fmac_f16_e64 v5, null, exec_lo -// GFX11: v_fmac_f16_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x36,0xd5,0x7c,0xfc,0x00,0x00] +v_fmac_f16_e64 v5.l, null, exec_lo +// GFX11: v_fmac_f16_e64 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x36,0xd5,0x7c,0xfc,0x00,0x00] -v_fmac_f16_e64 v5, -1, exec_hi -// GFX11: v_fmac_f16_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x36,0xd5,0xc1,0xfe,0x00,0x00] +v_fmac_f16_e64 v5.l, -1, exec_hi +// GFX11: v_fmac_f16_e64 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x36,0xd5,0xc1,0xfe,0x00,0x00] -v_fmac_f16_e64 v5, 0.5, -m0 mul:2 -// GFX11: v_fmac_f16_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x36,0xd5,0xf0,0xfa,0x00,0x48] +v_fmac_f16_e64 v5.l, 0.5, -m0 mul:2 +// GFX11: v_fmac_f16_e64 v5.l, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x36,0xd5,0xf0,0xfa,0x00,0x48] -v_fmac_f16_e64 v5, -src_scc, |vcc_lo| mul:4 -// GFX11: v_fmac_f16_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x36,0xd5,0xfd,0xd4,0x00,0x30] +v_fmac_f16_e64 v5.l, -src_scc, |vcc_lo| mul:4 +// GFX11: v_fmac_f16_e64 v5.l, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x36,0xd5,0xfd,0xd4,0x00,0x30] -v_fmac_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 -// GFX11: v_fmac_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x36,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] +v_fmac_f16_e64 v255.l, -|0xfe0b|, -|vcc_hi| clamp div:2 +// GFX11: v_fmac_f16_e64 v255.l, -|0xfe0b|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x36,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] + +v_fmac_f16_e64 v5.l, v1.h, v2.l +// GFX11: v_fmac_f16_e64 v5.l, v1.h, v2.l op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x36,0xd5,0x01,0x05,0x02,0x00] + +v_fmac_f16_e64 v5.l, v255.l, v255.h +// GFX11: v_fmac_f16_e64 v5.l, v255.l, v255.h op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x36,0xd5,0xff,0xff,0x03,0x00] + +v_fmac_f16_e64 v255.h, -|0xfe0b|, -|vcc_hi| clamp div:2 +// GFX11: v_fmac_f16_e64 v255.h, -|0xfe0b|, -|vcc_hi| op_sel:[0,0,1,1] clamp div:2 ; encoding: [0xff,0xc3,0x36,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] v_fmac_f32_e64 v5, v1, v2 // GFX11: v_fmac_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x2b,0xd5,0x01,0x05,0x02,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop2_t16_promote.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_t16_promote.s index e1286dadfb6e..296c42f4c2c6 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop2_t16_promote.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_t16_promote.s @@ -55,14 +55,23 @@ v_add_f16 v5.l, v255.l, v2.l dpp8:[7,6,5,4,3,2,1,0] v_add_f16 v5.l, v255.l, v2.l quad_perm:[3,2,1,0] // GFX12: v_add_f16_e64_dpp v5.l, v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_fmac_f16 v255, v1, v2 -// GFX12: v_fmac_f16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x36,0xd5,0x01,0x05,0x02,0x00] +v_fmac_f16 v255.h, v1.h, v2.h +// GFX12: v_fmac_f16_e64 v255.h, v1.h, v2.h op_sel:[1,1,1,1] ; encoding: [0xff,0x58,0x36,0xd5,0x01,0x05,0x02,0x00] -v_fmac_f16 v5, v1, v255 -// GFX12: v_fmac_f16_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x36,0xd5,0x01,0xff,0x03,0x00] +v_fmac_f16 v255.l, v1.l, v2.l +// GFX12: v_fmac_f16_e64 v255.l, v1.l, v2.l ; encoding: [0xff,0x00,0x36,0xd5,0x01,0x05,0x02,0x00] -v_fmac_f16 v5, v255, v2 -// GFX12: v_fmac_f16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x36,0xd5,0xff,0x05,0x02,0x00] +v_fmac_f16 v5.h, v1.h, v255.h +// GFX12: v_fmac_f16_e64 v5.h, v1.h, v255.h op_sel:[1,1,1,1] ; encoding: [0x05,0x58,0x36,0xd5,0x01,0xff,0x03,0x00] + +v_fmac_f16 v5.h, v255.h, v2.h +// GFX12: v_fmac_f16_e64 v5.h, v255.h, v2.h op_sel:[1,1,1,1] ; encoding: [0x05,0x58,0x36,0xd5,0xff,0x05,0x02,0x00] + +v_fmac_f16 v5.l, v1.l, v255.l +// GFX12: v_fmac_f16_e64 v5.l, v1.l, v255.l ; encoding: [0x05,0x00,0x36,0xd5,0x01,0xff,0x03,0x00] + +v_fmac_f16 v5.l, v255.l, v2.l +// GFX12: v_fmac_f16_e64 v5.l, v255.l, v2.l ; encoding: [0x05,0x00,0x36,0xd5,0xff,0x05,0x02,0x00] v_ldexp_f16 v255.h, v1.h, v2.h // GFX12: v_ldexp_f16_e64 v255.h, v1.h, v2.h op_sel:[1,1,1] ; encoding: [0xff,0x58,0x3b,0xd5,0x01,0x05,0x02,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2.s index 7674eafd9c30..f16838919a4a 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2.s @@ -578,50 +578,59 @@ v_cvt_pkrtz_f16_f32_e64 v5, -src_scc, |vcc_lo| v_cvt_pkrtz_f16_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp // GFX12: v_cvt_pk_rtz_f16_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp ; encoding: [0xff,0x83,0x2f,0xd5,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] -v_fmac_f16_e64 v5, v1, v2 -// GFX12: v_fmac_f16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x36,0xd5,0x01,0x05,0x02,0x00] +v_fmac_f16_e64 v5.l, v1.l, v2.l +// GFX12: v_fmac_f16_e64 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x36,0xd5,0x01,0x05,0x02,0x00] -v_fmac_f16_e64 v5, v255, v255 -// GFX12: v_fmac_f16_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x36,0xd5,0xff,0xff,0x03,0x00] +v_fmac_f16_e64 v5.l, v255.l, v255.l +// GFX12: v_fmac_f16_e64 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x36,0xd5,0xff,0xff,0x03,0x00] -v_fmac_f16_e64 v5, s1, s2 -// GFX12: v_fmac_f16_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x36,0xd5,0x01,0x04,0x00,0x00] +v_fmac_f16_e64 v5.l, s1, s2 +// GFX12: v_fmac_f16_e64 v5.l, s1, s2 ; encoding: [0x05,0x00,0x36,0xd5,0x01,0x04,0x00,0x00] -v_fmac_f16_e64 v5, s105, s105 -// GFX12: v_fmac_f16_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x36,0xd5,0x69,0xd2,0x00,0x00] +v_fmac_f16_e64 v5.l, s105, s105 +// GFX12: v_fmac_f16_e64 v5.l, s105, s105 ; encoding: [0x05,0x00,0x36,0xd5,0x69,0xd2,0x00,0x00] -v_fmac_f16_e64 v5, vcc_lo, ttmp15 -// GFX12: v_fmac_f16_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x36,0xd5,0x6a,0xf6,0x00,0x00] +v_fmac_f16_e64 v5.l, vcc_lo, ttmp15 +// GFX12: v_fmac_f16_e64 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x36,0xd5,0x6a,0xf6,0x00,0x00] -v_fmac_f16_e64 v5, vcc_hi, 0xfe0b -// GFX12: v_fmac_f16_e64 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x36,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +v_fmac_f16_e64 v5.l, vcc_hi, 0xfe0b +// GFX12: v_fmac_f16_e64 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x36,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -v_fmac_f16_e64 v5, ttmp15, src_scc -// GFX12: v_fmac_f16_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x36,0xd5,0x7b,0xfa,0x01,0x00] +v_fmac_f16_e64 v5.l, ttmp15, src_scc +// GFX12: v_fmac_f16_e64 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x36,0xd5,0x7b,0xfa,0x01,0x00] -v_fmac_f16_e64 v5, m0, 0.5 -// GFX12: v_fmac_f16_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x36,0xd5,0x7d,0xe0,0x01,0x00] +v_fmac_f16_e64 v5.l, m0, 0.5 +// GFX12: v_fmac_f16_e64 v5.l, m0, 0.5 ; encoding: [0x05,0x00,0x36,0xd5,0x7d,0xe0,0x01,0x00] -v_fmac_f16_e64 v5, exec_lo, -1 -// GFX12: v_fmac_f16_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x36,0xd5,0x7e,0x82,0x01,0x00] +v_fmac_f16_e64 v5.l, exec_lo, -1 +// GFX12: v_fmac_f16_e64 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x36,0xd5,0x7e,0x82,0x01,0x00] -v_fmac_f16_e64 v5, |exec_hi|, null -// GFX12: v_fmac_f16_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x36,0xd5,0x7f,0xf8,0x00,0x00] +v_fmac_f16_e64 v5.l, |exec_hi|, null +// GFX12: v_fmac_f16_e64 v5.l, |exec_hi|, null ; encoding: [0x05,0x01,0x36,0xd5,0x7f,0xf8,0x00,0x00] -v_fmac_f16_e64 v5, null, exec_lo -// GFX12: v_fmac_f16_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x36,0xd5,0x7c,0xfc,0x00,0x00] +v_fmac_f16_e64 v5.l, null, exec_lo +// GFX12: v_fmac_f16_e64 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x36,0xd5,0x7c,0xfc,0x00,0x00] -v_fmac_f16_e64 v5, -1, exec_hi -// GFX12: v_fmac_f16_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x36,0xd5,0xc1,0xfe,0x00,0x00] +v_fmac_f16_e64 v5.l, -1, exec_hi +// GFX12: v_fmac_f16_e64 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x36,0xd5,0xc1,0xfe,0x00,0x00] -v_fmac_f16_e64 v5, 0.5, -m0 mul:2 -// GFX12: v_fmac_f16_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x36,0xd5,0xf0,0xfa,0x00,0x48] +v_fmac_f16_e64 v5.l, 0.5, -m0 mul:2 +// GFX12: v_fmac_f16_e64 v5.l, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x36,0xd5,0xf0,0xfa,0x00,0x48] -v_fmac_f16_e64 v5, -src_scc, |vcc_lo| mul:4 -// GFX12: v_fmac_f16_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x36,0xd5,0xfd,0xd4,0x00,0x30] +v_fmac_f16_e64 v5.l, -src_scc, |vcc_lo| mul:4 +// GFX12: v_fmac_f16_e64 v5.l, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x36,0xd5,0xfd,0xd4,0x00,0x30] -v_fmac_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 -// GFX12: v_fmac_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x36,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] +v_fmac_f16_e64 v255.l, -|0xfe0b|, -|vcc_hi| clamp div:2 +// GFX12: v_fmac_f16_e64 v255.l, -|0xfe0b|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x36,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] + +v_fmac_f16_e64 v5.l, v1.h, v2.l +// GFX12: v_fmac_f16_e64 v5.l, v1.h, v2.l op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x36,0xd5,0x01,0x05,0x02,0x00] + +v_fmac_f16_e64 v5.l, v255.l, v255.h +// GFX12: v_fmac_f16_e64 v5.l, v255.l, v255.h op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x36,0xd5,0xff,0xff,0x03,0x00] + +v_fmac_f16_e64 v255.h, -|0xfe0b|, -|vcc_hi| clamp div:2 +// GFX12: v_fmac_f16_e64 v255.h, -|0xfe0b|, -|vcc_hi| op_sel:[0,0,1,1] clamp div:2 ; encoding: [0xff,0xc3,0x36,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] v_fmac_f32_e64 v5, v1, v2 // GFX12: v_fmac_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x2b,0xd5,0x01,0x05,0x02,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2_dpp16.s index e27706ec02ea..301f756e906a 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2_dpp16.s @@ -566,6 +566,48 @@ v_cvt_pkrtz_f16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 v_cvt_pkrtz_f16_f32_e64_dpp v255, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX12: v_cvt_pk_rtz_f16_f32_e64_dpp v255, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x83,0x2f,0xd5,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] +v_fmac_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX12: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_fmac_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX12: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_mirror +// GFX12: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_half_mirror +// GFX12: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_shl:1 +// GFX12: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_shl:15 +// GFX12: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_shr:1 +// GFX12: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_shr:15 +// GFX12: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_ror:1 +// GFX12: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_ror:15 +// GFX12: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_fmac_f16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_fmac_f16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_fmac_f16_e64_dpp v5.l, |v1.h|, -v2.l mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_fmac_f16_e64_dpp v5.l, |v1.h|, -v2.l op_sel:[1,0,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x09,0x36,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] + +v_fmac_f16_e64_dpp v5.l, -v1.l, |v2.h| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_fmac_f16_e64_dpp v5.l, -v1.l, |v2.h| op_sel:[0,1,0,0] mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x12,0x36,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] + +v_fmac_f16_e64_dpp v255.h, -|v255.l|, -|v255.l| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_fmac_f16_e64_dpp v255.h, -|v255.l|, -|v255.l| op_sel:[0,0,1,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc3,0x36,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x05,0x30] + v_ldexp_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] // GFX12: v_ldexp_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2_dpp8.s index 49233697e955..7390720e4dd5 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2_dpp8.s @@ -239,6 +239,21 @@ v_cvt_pkrtz_f16_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 v_cvt_pkrtz_f16_f32_e64_dpp v255, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: v_cvt_pk_rtz_f16_f32_e64_dpp v255, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x83,0x2f,0xd5,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +v_fmac_f16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x36,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_fmac_f16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_fmac_f16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x36,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_fmac_f16_e64_dpp v5.l, |v1.h|, -v2.l mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_fmac_f16_e64_dpp v5.l, |v1.h|, -v2.l op_sel:[1,0,0,0] mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x09,0x36,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] + +v_fmac_f16_e64_dpp v5.l, -v1.l, |v2.h| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_fmac_f16_e64_dpp v5.l, -v1.l, |v2.h| op_sel:[0,1,0,0] mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x12,0x36,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] + +v_fmac_f16_e64_dpp v255.h, -|v255.l|, -|v255.l| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_fmac_f16_e64_dpp v255.h, -|v255.l|, -|v255.l| op_sel:[0,0,1,1] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0xc3,0x36,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] + v_ldexp_f16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] // GFX12: v_ldexp_f16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/Disassembler/RISCV/emit-x8-as-fp.txt b/llvm/test/MC/Disassembler/RISCV/emit-x8-as-fp.txt new file mode 100644 index 000000000000..23d3f72c2ef5 --- /dev/null +++ b/llvm/test/MC/Disassembler/RISCV/emit-x8-as-fp.txt @@ -0,0 +1,22 @@ +# RUN: llvm-mc --disassemble -triple=riscv32 --show-encoding < %s \ +# RUN: | FileCheck --check-prefixes=DEFAULT %s +# RUN: llvm-mc --disassemble -triple=riscv64 --show-encoding < %s \ +# RUN: | FileCheck --check-prefixes=DEFAULT %s +# RUN: llvm-mc --disassemble -triple=riscv32 -M emit-x8-as-fp \ +# RUN: --show-encoding < %s | FileCheck --check-prefixes=EMIT-FP %s +# RUN: llvm-mc --disassemble -triple=riscv64 -M emit-x8-as-fp \ +# RUN: --show-encoding < %s | FileCheck --check-prefixes=EMIT-FP %s +# RUN: llvm-mc --disassemble -triple=riscv32 -M numeric -M emit-x8-as-fp \ +# RUN: --show-encoding < %s | FileCheck --check-prefixes=NUMERIC %s +# RUN: llvm-mc --disassemble -triple=riscv64 -M numeric -M emit-x8-as-fp \ +# RUN: --show-encoding < %s | FileCheck --check-prefixes=NUMERIC %s + +# DEFAULT: sw a0, -12(s0) # encoding: [0x23,0x2a,0xa4,0xfe] +# EMIT-FP: sw a0, -12(fp) # encoding: [0x23,0x2a,0xa4,0xfe] +# NUMERIC: sw x10, -12(x8) # encoding: [0x23,0x2a,0xa4,0xfe] +0x23 0x2a 0xa4 0xfe + +# DEFAULT: lw a0, -12(s0) # encoding: [0x03,0x25,0x44,0xff] +# EMIT-FP: lw a0, -12(fp) # encoding: [0x03,0x25,0x44,0xff] +# NUMERIC: lw x10, -12(x8) # encoding: [0x03,0x25,0x44,0xff] +0x03 0x25 0x44 0xff diff --git a/llvm/test/MC/RISCV/emit-x8-as-fp.s b/llvm/test/MC/RISCV/emit-x8-as-fp.s new file mode 100644 index 000000000000..ea5143f5066f --- /dev/null +++ b/llvm/test/MC/RISCV/emit-x8-as-fp.s @@ -0,0 +1,42 @@ +# RUN: llvm-mc --triple=riscv32 --show-encoding < %s 2>&1 \ +# RUN: | FileCheck --check-prefix=DEFAULT %s +# RUN: llvm-mc --triple=riscv64 --show-encoding < %s 2>&1 \ +# RUN: | FileCheck --check-prefix=DEFAULT %s +# RUN: llvm-mc --triple=riscv32 -M emit-x8-as-fp --show-encoding < %s 2>&1 \ +# RUN: | FileCheck --check-prefix=EMIT-FP %s +# RUN: llvm-mc --triple=riscv64 -M emit-x8-as-fp --show-encoding < %s 2>&1 \ +# RUN: | FileCheck --check-prefix=EMIT-FP %s +# RUN: llvm-mc --triple=riscv32 -M numeric -M emit-x8-as-fp --show-encoding \ +# RUN: < %s 2>&1 | FileCheck --check-prefix=NUMERIC %s +# RUN: llvm-mc --triple=riscv64 -M numeric -M emit-x8-as-fp --show-encoding \ +# RUN: < %s 2>&1 | FileCheck --check-prefix=NUMERIC %s + +# DEFAULT: sw a0, -12(s0) # encoding: [0x23,0x2a,0xa4,0xfe] +# EMIT-FP: sw a0, -12(fp) # encoding: [0x23,0x2a,0xa4,0xfe] +# NUMERIC: sw x10, -12(x8) # encoding: [0x23,0x2a,0xa4,0xfe] +sw a0, -12(s0) + +# DEFAULT: lw a0, -12(s0) # encoding: [0x03,0x25,0x44,0xff] +# EMIT-FP: lw a0, -12(fp) # encoding: [0x03,0x25,0x44,0xff] +# NUMERIC: lw x10, -12(x8) # encoding: [0x03,0x25,0x44,0xff] +lw a0, -12(s0) + +# DEFAULT: sw a0, -12(s0) # encoding: [0x23,0x2a,0xa4,0xfe] +# EMIT-FP: sw a0, -12(fp) # encoding: [0x23,0x2a,0xa4,0xfe] +# NUMERIC: sw x10, -12(x8) # encoding: [0x23,0x2a,0xa4,0xfe] +sw a0, -12(fp) + +# DEFAULT: lw a0, -12(s0) # encoding: [0x03,0x25,0x44,0xff] +# EMIT-FP: lw a0, -12(fp) # encoding: [0x03,0x25,0x44,0xff] +# NUMERIC: lw x10, -12(x8) # encoding: [0x03,0x25,0x44,0xff] +lw a0, -12(fp) + +# DEFAULT: sw a0, -12(s0) # encoding: [0x23,0x2a,0xa4,0xfe] +# EMIT-FP: sw a0, -12(fp) # encoding: [0x23,0x2a,0xa4,0xfe] +# NUMERIC: sw x10, -12(x8) # encoding: [0x23,0x2a,0xa4,0xfe] +sw a0, -12(x8) + +# DEFAULT: lw a0, -12(s0) # encoding: [0x03,0x25,0x44,0xff] +# EMIT-FP: lw a0, -12(fp) # encoding: [0x03,0x25,0x44,0xff] +# NUMERIC: lw x10, -12(x8) # encoding: [0x03,0x25,0x44,0xff] +lw a0, -12(x8) diff --git a/llvm/test/MC/RISCV/function-call-invalid.s b/llvm/test/MC/RISCV/function-call-invalid.s index 2b7a85245880..17d02015a694 100644 --- a/llvm/test/MC/RISCV/function-call-invalid.s +++ b/llvm/test/MC/RISCV/function-call-invalid.s @@ -10,3 +10,5 @@ call %lo(1234) # CHECK: :[[@LINE]]:6: error: operand must be a bare symbol name call %hi(foo) # CHECK: :[[@LINE]]:6: error: operand must be a bare symbol name call %lo(foo) # CHECK: :[[@LINE]]:6: error: operand must be a bare symbol name call foo, bar # CHECK: :[[@LINE]]:6: error: operand must be a bare symbol name +call foo@pls # CHECK: :[[@LINE]]:10: error: @ (except the deprecated/ignored @plt) is disallowed +call foo@3 # CHECK: :[[@LINE]]:10: error: @ (except the deprecated/ignored @plt) is disallowed diff --git a/llvm/test/MC/RISCV/tail-call-invalid.s b/llvm/test/MC/RISCV/tail-call-invalid.s index 270d84df58ac..14ff996b2e4b 100644 --- a/llvm/test/MC/RISCV/tail-call-invalid.s +++ b/llvm/test/MC/RISCV/tail-call-invalid.s @@ -10,3 +10,4 @@ tail %hi(1234) # CHECK: :[[@LINE]]:6: error: operand must be a bare symbol name tail %lo(1234) # CHECK: :[[@LINE]]:6: error: operand must be a bare symbol name tail %hi(foo) # CHECK: :[[@LINE]]:6: error: operand must be a bare symbol name tail %lo(foo) # CHECK: :[[@LINE]]:6: error: operand must be a bare symbol name +tail foo@pls # CHECK: :[[@LINE]]:10: error: @ (except the deprecated/ignored @plt) is disallowed diff --git a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-inline-threshold.ll b/llvm/test/Other/new-pm-lto-prelink-samplepgo-inline-threshold.ll index 9baedcb02ca0..67af5f18e057 100644 --- a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-inline-threshold.ll +++ b/llvm/test/Other/new-pm-lto-prelink-samplepgo-inline-threshold.ll @@ -1,4 +1,4 @@ -; Tests that hot callsite threshold is set to 0 artifically for thinlto-prelink pipeline. +; Tests that hot callsite threshold is set to 0 artifically for thinlto-prelink and lto-pre-link pipeline. ; ; Function `sum` is annotated with inline cost -1 and function `sum1` is ; annotated with inline cost 0, by function attribute `function-inline-cost`. @@ -12,6 +12,9 @@ ; RUN: opt < %s -pass-remarks=inline -pass-remarks-missed=inline -passes='thinlto-pre-link<O2>' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/new-pm-thinlto-prelink-samplepgo-inline-threshold.prof -S 2>&1 | FileCheck %s -check-prefix=REMARK +; RUN: opt < %s -pass-remarks=inline -pass-remarks-missed=inline -passes='lto-pre-link<O2>' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/new-pm-thinlto-prelink-samplepgo-inline-threshold.prof -S | FileCheck %s + +; RUN: opt < %s -pass-remarks=inline -pass-remarks-missed=inline -passes='lto-pre-link<O2>' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/new-pm-thinlto-prelink-samplepgo-inline-threshold.prof -S 2>&1 | FileCheck %s -check-prefix=REMARK ; Original C++ test case ; ; #include <stdio.h> diff --git a/llvm/test/Transforms/GVN/pr65447.ll b/llvm/test/Transforms/GVN/pr65447.ll index 1b951e907e82..1fa3811a3a81 100644 --- a/llvm/test/Transforms/GVN/pr65447.ll +++ b/llvm/test/Transforms/GVN/pr65447.ll @@ -2,29 +2,98 @@ ; RUN: opt -S -passes=gvn < %s | FileCheck %s ; Make sure deduplicated phi nodes are removed from the VN map. -define i64 @f() { -; CHECK-LABEL: define i64 @f() { +define i64 @f2(ptr %arg) { +; CHECK-LABEL: define i64 @f2( +; CHECK-SAME: ptr [[ARG:%.*]]) { +; CHECK-NEXT: BB: +; CHECK-NEXT: store i1 false, ptr [[ARG]], align 1 +; CHECK-NEXT: br label [[BB2D:%.*]] +; CHECK: BB2a: +; CHECK-NEXT: br label [[BB2B:%.*]] +; CHECK: BB2b: +; CHECK-NEXT: br label [[BB2C:%.*]] +; CHECK: BB2c: +; CHECK-NEXT: [[AZ2:%.*]] = phi i1 [ true, [[BB2B]] ], [ [[AZ:%.*]], [[BB2D]] ] +; CHECK-NEXT: [[DOTPHI_TRANS_INSERT:%.*]] = sext i1 [[AZ2]] to i64 +; CHECK-NEXT: [[GEP2_PHI_TRANS_INSERT:%.*]] = getelementptr i1, ptr [[ARG]], i64 [[DOTPHI_TRANS_INSERT]] +; CHECK-NEXT: [[L93_PRE:%.*]] = load i1, ptr [[GEP2_PHI_TRANS_INSERT]], align 1 +; CHECK-NEXT: br label [[BB2D]] +; CHECK: BB2d: +; CHECK-NEXT: [[AZ]] = phi i1 [ [[AZ2]], [[BB2C]] ], [ false, [[BB:%.*]] ] +; CHECK-NEXT: [[L93:%.*]] = phi i1 [ [[L93_PRE]], [[BB2C]] ], [ false, [[BB]] ] +; CHECK-NEXT: [[TMP0:%.*]] = sext i1 [[AZ]] to i64 +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i1, ptr [[ARG]], i64 [[TMP0]] +; CHECK-NEXT: store i1 [[AZ]], ptr [[ARG]], align 2 +; CHECK-NEXT: br i1 [[L93]], label [[BB2C]], label [[BB1E:%.*]] +; CHECK: BB1e: +; CHECK-NEXT: br i1 [[AZ]], label [[BB2F:%.*]], label [[BB4:%.*]] +; CHECK: BB2f: +; CHECK-NEXT: store i1 true, ptr [[ARG]], align 2 +; CHECK-NEXT: br label [[BB2B]] +; CHECK: BB4: +; CHECK-NEXT: br label [[BB4]] +; +BB: + store i1 false, ptr %arg, align 1 + br label %BB2d + +BB2a: ; No predecessors! + br label %BB2b + +BB2b: ; preds = %BB2f, %BB2a + br label %BB2c + +BB2c: ; preds = %BB2d, %BB2b + %0 = phi i1 [ true, %BB2b ], [ %1, %BB2d ] + br label %BB2d + +BB2d: ; preds = %BB2c, %BB + %1 = phi i1 [ %0, %BB2c ], [ false, %BB ] + %2 = sext i1 %1 to i64 + %gep2 = getelementptr i1, ptr %arg, i64 %2 + %L93 = load i1, ptr %gep2, align 1 + %Az = load i1, ptr %arg, align 2 + store i1 %1, ptr %arg, align 2 + br i1 %L93, label %BB2c, label %BB1e + +BB1e: ; preds = %BB2d + br i1 %Az, label %BB2f, label %BB4 + +BB2f: ; preds = %BB1e + store i1 true, ptr %arg, align 2 + br label %BB2b + +BB4: ; preds = %BB1e, %BB4 + br label %BB4 + +; uselistorder directives + uselistorder label %BB4, { 1, 0 } +} + +; Make sure deduplicated phi nodes are removed from the VN map. Make +; sure there is no assert on attempt to use ConstantData use lists. +define i64 @f_null() { +; CHECK-LABEL: define i64 @f_null() { ; CHECK-NEXT: BB: ; CHECK-NEXT: store i1 false, ptr null, align 1 ; CHECK-NEXT: br label [[BB2D:%.*]] ; CHECK: BB2a: ; CHECK-NEXT: br label [[BB2B:%.*]] ; CHECK: BB2b: -; CHECK-NEXT: [[L93_PRE_PRE:%.*]] = load i1, ptr inttoptr (i64 -1 to ptr), align 1 ; CHECK-NEXT: br label [[BB2C:%.*]] ; CHECK: BB2c: -; CHECK-NEXT: [[L93_PRE:%.*]] = phi i1 [ [[L93_PRE_PRE]], [[BB2B]] ], [ true, [[BB2D]] ] ; CHECK-NEXT: [[AZ2:%.*]] = phi i1 [ true, [[BB2B]] ], [ [[AZ:%.*]], [[BB2D]] ] ; CHECK-NEXT: [[DOTPHI_TRANS_INSERT:%.*]] = sext i1 [[AZ2]] to i64 ; CHECK-NEXT: [[GEP2_PHI_TRANS_INSERT:%.*]] = getelementptr i1, ptr null, i64 [[DOTPHI_TRANS_INSERT]] +; CHECK-NEXT: [[L93_PRE:%.*]] = load i1, ptr [[GEP2_PHI_TRANS_INSERT]], align 1 ; CHECK-NEXT: br label [[BB2D]] ; CHECK: BB2d: -; CHECK-NEXT: [[L93_PRE5:%.*]] = phi i1 [ [[L93_PRE]], [[BB2C]] ], [ false, [[BB:%.*]] ] -; CHECK-NEXT: [[AZ]] = phi i1 [ [[AZ2]], [[BB2C]] ], [ false, [[BB]] ] +; CHECK-NEXT: [[AZ]] = phi i1 [ [[AZ2]], [[BB2C]] ], [ false, [[BB:%.*]] ] +; CHECK-NEXT: [[L93:%.*]] = phi i1 [ [[L93_PRE]], [[BB2C]] ], [ false, [[BB]] ] ; CHECK-NEXT: [[TMP0:%.*]] = sext i1 [[AZ]] to i64 ; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i1, ptr null, i64 [[TMP0]] ; CHECK-NEXT: store i1 [[AZ]], ptr null, align 2 -; CHECK-NEXT: br i1 [[L93_PRE5]], label [[BB2C]], label [[BB1E:%.*]] +; CHECK-NEXT: br i1 [[L93]], label [[BB2C]], label [[BB1E:%.*]] ; CHECK: BB1e: ; CHECK-NEXT: br i1 [[AZ]], label [[BB2F:%.*]], label [[BB4:%.*]] ; CHECK: BB2f: diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll index 7fb0fbdda0b5..f71aaa289b89 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll @@ -42,8 +42,7 @@ define <vscale x 8 x i16> @srshl_abs_positive_merge(<vscale x 8 x i16> %a, <vsca define <vscale x 8 x i16> @srshl_abs_all_active_pred(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i1> %pg2) #0 { ; CHECK-LABEL: @srshl_abs_all_active_pred( -; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> [[B:%.*]], <vscale x 8 x i1> [[PG]], <vscale x 8 x i16> [[A:%.*]]) +; CHECK-NEXT: [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> [[B:%.*]], <vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A:%.*]]) ; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> splat (i16 2)) ; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] ; diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll index b8ea4de3d238..1c5f7464d858 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll @@ -5,8 +5,7 @@ target triple = "aarch64-unknown-linux-gnu" define <vscale x 8 x bfloat> @test_fcvt_bf16_f32_poison(<vscale x 8 x bfloat> %a, <vscale x 4 x float> %b) { ; CHECK-LABEL: define <vscale x 8 x bfloat> @test_fcvt_bf16_f32_poison( ; CHECK-SAME: <vscale x 8 x bfloat> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> poison, <vscale x 4 x i1> [[PG]], <vscale x 4 x float> [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> poison, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[B]]) ; CHECK-NEXT: ret <vscale x 8 x bfloat> [[OUT]] ; %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) @@ -17,8 +16,7 @@ define <vscale x 8 x bfloat> @test_fcvt_bf16_f32_poison(<vscale x 8 x bfloat> %a define <vscale x 8 x bfloat> @test_fcvt_bf16_f32(<vscale x 8 x bfloat> %a, <vscale x 4 x float> %b) { ; CHECK-LABEL: define <vscale x 8 x bfloat> @test_fcvt_bf16_f32( ; CHECK-SAME: <vscale x 8 x bfloat> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> undef, <vscale x 4 x i1> [[PG]], <vscale x 4 x float> [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> undef, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[B]]) ; CHECK-NEXT: ret <vscale x 8 x bfloat> [[OUT]] ; %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) @@ -29,8 +27,7 @@ define <vscale x 8 x bfloat> @test_fcvt_bf16_f32(<vscale x 8 x bfloat> %a, <vsca define <vscale x 8 x half> @test_fcvt_f16_f32(<vscale x 8 x half> %a, <vscale x 4 x float> %b) { ; CHECK-LABEL: define <vscale x 8 x half> @test_fcvt_f16_f32( ; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f32(<vscale x 8 x half> undef, <vscale x 4 x i1> [[PG]], <vscale x 4 x float> [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f32(<vscale x 8 x half> undef, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[B]]) ; CHECK-NEXT: ret <vscale x 8 x half> [[OUT]] ; %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) @@ -41,8 +38,7 @@ define <vscale x 8 x half> @test_fcvt_f16_f32(<vscale x 8 x half> %a, <vscale x define <vscale x 8 x half> @test_fcvt_f16_f64(<vscale x 8 x half> %a, <vscale x 2 x double> %b) { ; CHECK-LABEL: define <vscale x 8 x half> @test_fcvt_f16_f64( ; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f64(<vscale x 8 x half> undef, <vscale x 2 x i1> [[PG]], <vscale x 2 x double> [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f64(<vscale x 8 x half> undef, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[B]]) ; CHECK-NEXT: ret <vscale x 8 x half> [[OUT]] ; %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) @@ -53,8 +49,7 @@ define <vscale x 8 x half> @test_fcvt_f16_f64(<vscale x 8 x half> %a, <vscale x define <vscale x 4 x float> @test_fcvt_f32_f16(<vscale x 4 x float> %a, <vscale x 8 x half> %b) { ; CHECK-LABEL: define <vscale x 4 x float> @test_fcvt_f32_f16( ; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f16(<vscale x 4 x float> undef, <vscale x 4 x i1> [[PG]], <vscale x 8 x half> [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f16(<vscale x 4 x float> undef, <vscale x 4 x i1> splat (i1 true), <vscale x 8 x half> [[B]]) ; CHECK-NEXT: ret <vscale x 4 x float> [[OUT]] ; %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) @@ -65,8 +60,7 @@ define <vscale x 4 x float> @test_fcvt_f32_f16(<vscale x 4 x float> %a, <vscale define <vscale x 4 x float> @test_fcvt_f32_f64(<vscale x 4 x float> %a, <vscale x 2 x double> %b) { ; CHECK-LABEL: define <vscale x 4 x float> @test_fcvt_f32_f64( ; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f64(<vscale x 4 x float> undef, <vscale x 2 x i1> [[PG]], <vscale x 2 x double> [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f64(<vscale x 4 x float> undef, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[B]]) ; CHECK-NEXT: ret <vscale x 4 x float> [[OUT]] ; %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) @@ -77,8 +71,7 @@ define <vscale x 4 x float> @test_fcvt_f32_f64(<vscale x 4 x float> %a, <vscale define <vscale x 2 x double> @test_fcvt_f64_f16(<vscale x 2 x double> %a, <vscale x 8 x half> %b) { ; CHECK-LABEL: define <vscale x 2 x double> @test_fcvt_f64_f16( ; CHECK-SAME: <vscale x 2 x double> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f16(<vscale x 2 x double> undef, <vscale x 2 x i1> [[PG]], <vscale x 8 x half> [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f16(<vscale x 2 x double> undef, <vscale x 2 x i1> splat (i1 true), <vscale x 8 x half> [[B]]) ; CHECK-NEXT: ret <vscale x 2 x double> [[OUT]] ; %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) @@ -89,8 +82,7 @@ define <vscale x 2 x double> @test_fcvt_f64_f16(<vscale x 2 x double> %a, <vscal define <vscale x 2 x double> @test_fcvt_f64_f32(<vscale x 2 x double> %a, <vscale x 4 x float> %b) { ; CHECK-LABEL: define <vscale x 2 x double> @test_fcvt_f64_f32( ; CHECK-SAME: <vscale x 2 x double> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f32(<vscale x 2 x double> undef, <vscale x 2 x i1> [[PG]], <vscale x 4 x float> [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f32(<vscale x 2 x double> undef, <vscale x 2 x i1> splat (i1 true), <vscale x 4 x float> [[B]]) ; CHECK-NEXT: ret <vscale x 2 x double> [[OUT]] ; %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) @@ -101,8 +93,7 @@ define <vscale x 2 x double> @test_fcvt_f64_f32(<vscale x 2 x double> %a, <vscal define <vscale x 4 x float> @test_fcvtlt_f32_f16(<vscale x 4 x float> %a, <vscale x 8 x half> %b) { ; CHECK-LABEL: define <vscale x 4 x float> @test_fcvtlt_f32_f16( ; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtlt.f32f16(<vscale x 4 x float> undef, <vscale x 4 x i1> [[PG]], <vscale x 8 x half> [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtlt.f32f16(<vscale x 4 x float> undef, <vscale x 4 x i1> splat (i1 true), <vscale x 8 x half> [[B]]) ; CHECK-NEXT: ret <vscale x 4 x float> [[OUT]] ; %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) @@ -113,8 +104,7 @@ define <vscale x 4 x float> @test_fcvtlt_f32_f16(<vscale x 4 x float> %a, <vscal define <vscale x 2 x double> @test_fcvtlt_f64_f32(<vscale x 2 x double> %a, <vscale x 4 x float> %b) { ; CHECK-LABEL: define <vscale x 2 x double> @test_fcvtlt_f64_f32( ; CHECK-SAME: <vscale x 2 x double> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fcvtlt.f64f32(<vscale x 2 x double> undef, <vscale x 2 x i1> [[PG]], <vscale x 4 x float> [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fcvtlt.f64f32(<vscale x 2 x double> undef, <vscale x 2 x i1> splat (i1 true), <vscale x 4 x float> [[B]]) ; CHECK-NEXT: ret <vscale x 2 x double> [[OUT]] ; %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) @@ -125,8 +115,7 @@ define <vscale x 2 x double> @test_fcvtlt_f64_f32(<vscale x 2 x double> %a, <vsc define <vscale x 8 x bfloat> @test_fcvtnt_bf16_f32(<vscale x 8 x bfloat> %a, <vscale x 4 x float> %b) { ; CHECK-LABEL: define <vscale x 8 x bfloat> @test_fcvtnt_bf16_f32( ; CHECK-SAME: <vscale x 8 x bfloat> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32.v2(<vscale x 8 x bfloat> [[A]], <vscale x 4 x i1> [[PG]], <vscale x 4 x float> [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32.v2(<vscale x 8 x bfloat> [[A]], <vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[B]]) ; CHECK-NEXT: ret <vscale x 8 x bfloat> [[OUT]] ; %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) @@ -137,8 +126,7 @@ define <vscale x 8 x bfloat> @test_fcvtnt_bf16_f32(<vscale x 8 x bfloat> %a, <vs define <vscale x 8 x half> @test_fcvtnt_f16_f32(<vscale x 8 x half> %a, <vscale x 4 x float> %b) { ; CHECK-LABEL: define <vscale x 8 x half> @test_fcvtnt_f16_f32( ; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcvtnt.f16f32(<vscale x 8 x half> [[A]], <vscale x 4 x i1> [[PG]], <vscale x 4 x float> [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcvtnt.f16f32(<vscale x 8 x half> [[A]], <vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[B]]) ; CHECK-NEXT: ret <vscale x 8 x half> [[OUT]] ; %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) @@ -149,8 +137,7 @@ define <vscale x 8 x half> @test_fcvtnt_f16_f32(<vscale x 8 x half> %a, <vscale define <vscale x 4 x float> @test_fcvtnt_f32_f64(<vscale x 4 x float> %a, <vscale x 2 x double> %b) { ; CHECK-LABEL: define <vscale x 4 x float> @test_fcvtnt_f32_f64( ; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtnt.f32f64(<vscale x 4 x float> [[A]], <vscale x 2 x i1> [[PG]], <vscale x 2 x double> [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtnt.f32f64(<vscale x 4 x float> [[A]], <vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[B]]) ; CHECK-NEXT: ret <vscale x 4 x float> [[OUT]] ; %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) @@ -161,8 +148,7 @@ define <vscale x 4 x float> @test_fcvtnt_f32_f64(<vscale x 4 x float> %a, <vscal define <vscale x 4 x float> @test_fcvtx_f32_f64(<vscale x 4 x float> %a, <vscale x 2 x double> %b) { ; CHECK-LABEL: define <vscale x 4 x float> @test_fcvtx_f32_f64( ; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtx.f32f64(<vscale x 4 x float> undef, <vscale x 2 x i1> [[PG]], <vscale x 2 x double> [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtx.f32f64(<vscale x 4 x float> undef, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[B]]) ; CHECK-NEXT: ret <vscale x 4 x float> [[OUT]] ; %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) @@ -173,8 +159,7 @@ define <vscale x 4 x float> @test_fcvtx_f32_f64(<vscale x 4 x float> %a, <vscale define <vscale x 4 x float> @test_fcvtxnt_f32_f64(<vscale x 4 x float> %a, <vscale x 2 x double> %b) { ; CHECK-LABEL: define <vscale x 4 x float> @test_fcvtxnt_f32_f64( ; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtxnt.f32f64(<vscale x 4 x float> [[A]], <vscale x 2 x i1> [[PG]], <vscale x 2 x double> [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtxnt.f32f64(<vscale x 4 x float> [[A]], <vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[B]]) ; CHECK-NEXT: ret <vscale x 4 x float> [[OUT]] ; %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) @@ -185,8 +170,7 @@ define <vscale x 4 x float> @test_fcvtxnt_f32_f64(<vscale x 4 x float> %a, <vsca define <vscale x 8 x i16> @test_fcvtzs(<vscale x 8 x i16> %a, <vscale x 8 x half> %b) { ; CHECK-LABEL: define <vscale x 8 x i16> @test_fcvtzs( ; CHECK-SAME: <vscale x 8 x i16> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzs.nxv8i16.nxv8f16(<vscale x 8 x i16> undef, <vscale x 8 x i1> [[PG]], <vscale x 8 x half> [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzs.nxv8i16.nxv8f16(<vscale x 8 x i16> undef, <vscale x 8 x i1> splat (i1 true), <vscale x 8 x half> [[B]]) ; CHECK-NEXT: ret <vscale x 8 x i16> [[OUT]] ; %pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) @@ -197,8 +181,7 @@ define <vscale x 8 x i16> @test_fcvtzs(<vscale x 8 x i16> %a, <vscale x 8 x half define <vscale x 4 x i32> @test_fcvtzs_i32_f16(<vscale x 4 x i32> %a, <vscale x 8 x half> %b) { ; CHECK-LABEL: define <vscale x 4 x i32> @test_fcvtzs_i32_f16( ; CHECK-SAME: <vscale x 4 x i32> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f16(<vscale x 4 x i32> undef, <vscale x 4 x i1> [[PG]], <vscale x 8 x half> [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f16(<vscale x 4 x i32> undef, <vscale x 4 x i1> splat (i1 true), <vscale x 8 x half> [[B]]) ; CHECK-NEXT: ret <vscale x 4 x i32> [[OUT]] ; %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) @@ -209,8 +192,7 @@ define <vscale x 4 x i32> @test_fcvtzs_i32_f16(<vscale x 4 x i32> %a, <vscale x define <vscale x 4 x i32> @test_fcvtzs_i32_f64(<vscale x 4 x i32> %a, <vscale x 2 x double> %b) { ; CHECK-LABEL: define <vscale x 4 x i32> @test_fcvtzs_i32_f64( ; CHECK-SAME: <vscale x 4 x i32> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f64(<vscale x 4 x i32> undef, <vscale x 2 x i1> [[PG]], <vscale x 2 x double> [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f64(<vscale x 4 x i32> undef, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[B]]) ; CHECK-NEXT: ret <vscale x 4 x i32> [[OUT]] ; %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) @@ -221,8 +203,7 @@ define <vscale x 4 x i32> @test_fcvtzs_i32_f64(<vscale x 4 x i32> %a, <vscale x define <vscale x 2 x i64> @test_fcvtzs_i64_f16(<vscale x 2 x i64> %a, <vscale x 8 x half> %b) { ; CHECK-LABEL: define <vscale x 2 x i64> @test_fcvtzs_i64_f16( ; CHECK-SAME: <vscale x 2 x i64> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f16(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[PG]], <vscale x 8 x half> [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f16(<vscale x 2 x i64> undef, <vscale x 2 x i1> splat (i1 true), <vscale x 8 x half> [[B]]) ; CHECK-NEXT: ret <vscale x 2 x i64> [[OUT]] ; %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) @@ -233,8 +214,7 @@ define <vscale x 2 x i64> @test_fcvtzs_i64_f16(<vscale x 2 x i64> %a, <vscale x define <vscale x 2 x i64> @test_fcvtzs_i64_f32(<vscale x 2 x i64> %a, <vscale x 4 x float> %b) { ; CHECK-LABEL: define <vscale x 2 x i64> @test_fcvtzs_i64_f32( ; CHECK-SAME: <vscale x 2 x i64> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f32(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[PG]], <vscale x 4 x float> [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f32(<vscale x 2 x i64> undef, <vscale x 2 x i1> splat (i1 true), <vscale x 4 x float> [[B]]) ; CHECK-NEXT: ret <vscale x 2 x i64> [[OUT]] ; %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) @@ -245,8 +225,7 @@ define <vscale x 2 x i64> @test_fcvtzs_i64_f32(<vscale x 2 x i64> %a, <vscale x define <vscale x 8 x i16> @test_fcvtzu(<vscale x 8 x i16> %a, <vscale x 8 x half> %b) { ; CHECK-LABEL: define <vscale x 8 x i16> @test_fcvtzu( ; CHECK-SAME: <vscale x 8 x i16> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzu.nxv8i16.nxv8f16(<vscale x 8 x i16> undef, <vscale x 8 x i1> [[PG]], <vscale x 8 x half> [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzu.nxv8i16.nxv8f16(<vscale x 8 x i16> undef, <vscale x 8 x i1> splat (i1 true), <vscale x 8 x half> [[B]]) ; CHECK-NEXT: ret <vscale x 8 x i16> [[OUT]] ; %pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) @@ -257,8 +236,7 @@ define <vscale x 8 x i16> @test_fcvtzu(<vscale x 8 x i16> %a, <vscale x 8 x half define <vscale x 4 x i32> @test_fcvtzu_i32_f16(<vscale x 4 x i32> %a, <vscale x 8 x half> %b) { ; CHECK-LABEL: define <vscale x 4 x i32> @test_fcvtzu_i32_f16( ; CHECK-SAME: <vscale x 4 x i32> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f16(<vscale x 4 x i32> undef, <vscale x 4 x i1> [[PG]], <vscale x 8 x half> [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f16(<vscale x 4 x i32> undef, <vscale x 4 x i1> splat (i1 true), <vscale x 8 x half> [[B]]) ; CHECK-NEXT: ret <vscale x 4 x i32> [[OUT]] ; %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) @@ -269,8 +247,7 @@ define <vscale x 4 x i32> @test_fcvtzu_i32_f16(<vscale x 4 x i32> %a, <vscale x define <vscale x 4 x i32> @test_fcvtzu_i32_f64(<vscale x 4 x i32> %a, <vscale x 2 x double> %b) { ; CHECK-LABEL: define <vscale x 4 x i32> @test_fcvtzu_i32_f64( ; CHECK-SAME: <vscale x 4 x i32> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f64(<vscale x 4 x i32> undef, <vscale x 2 x i1> [[PG]], <vscale x 2 x double> [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f64(<vscale x 4 x i32> undef, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[B]]) ; CHECK-NEXT: ret <vscale x 4 x i32> [[OUT]] ; %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) @@ -281,8 +258,7 @@ define <vscale x 4 x i32> @test_fcvtzu_i32_f64(<vscale x 4 x i32> %a, <vscale x define <vscale x 2 x i64> @test_fcvtzu_i64_f16(<vscale x 2 x i64> %a, <vscale x 8 x half> %b) { ; CHECK-LABEL: define <vscale x 2 x i64> @test_fcvtzu_i64_f16( ; CHECK-SAME: <vscale x 2 x i64> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f16(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[PG]], <vscale x 8 x half> [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f16(<vscale x 2 x i64> undef, <vscale x 2 x i1> splat (i1 true), <vscale x 8 x half> [[B]]) ; CHECK-NEXT: ret <vscale x 2 x i64> [[OUT]] ; %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) @@ -293,8 +269,7 @@ define <vscale x 2 x i64> @test_fcvtzu_i64_f16(<vscale x 2 x i64> %a, <vscale x define <vscale x 2 x i64> @test_fcvtzu_i64_f32(<vscale x 2 x i64> %a, <vscale x 4 x float> %b) { ; CHECK-LABEL: define <vscale x 2 x i64> @test_fcvtzu_i64_f32( ; CHECK-SAME: <vscale x 2 x i64> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f32(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[PG]], <vscale x 4 x float> [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f32(<vscale x 2 x i64> undef, <vscale x 2 x i1> splat (i1 true), <vscale x 4 x float> [[B]]) ; CHECK-NEXT: ret <vscale x 2 x i64> [[OUT]] ; %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) @@ -305,8 +280,7 @@ define <vscale x 2 x i64> @test_fcvtzu_i64_f32(<vscale x 2 x i64> %a, <vscale x define <vscale x 8 x half> @test_scvtf(<vscale x 8 x half> %a, <vscale x 8 x i16> %b) { ; CHECK-LABEL: define <vscale x 8 x half> @test_scvtf( ; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.nxv8f16.nxv8i16(<vscale x 8 x half> undef, <vscale x 8 x i1> [[PG]], <vscale x 8 x i16> [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.nxv8f16.nxv8i16(<vscale x 8 x half> undef, <vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[B]]) ; CHECK-NEXT: ret <vscale x 8 x half> [[OUT]] ; %pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) @@ -317,8 +291,7 @@ define <vscale x 8 x half> @test_scvtf(<vscale x 8 x half> %a, <vscale x 8 x i16 define <vscale x 8 x half> @test_scvtf_f16_i32(<vscale x 8 x half> %a, <vscale x 4 x i32> %b) { ; CHECK-LABEL: define <vscale x 8 x half> @test_scvtf_f16_i32( ; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i32(<vscale x 8 x half> undef, <vscale x 4 x i1> [[PG]], <vscale x 4 x i32> [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i32(<vscale x 8 x half> undef, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[B]]) ; CHECK-NEXT: ret <vscale x 8 x half> [[OUT]] ; %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) @@ -329,8 +302,7 @@ define <vscale x 8 x half> @test_scvtf_f16_i32(<vscale x 8 x half> %a, <vscale x define <vscale x 8 x half> @test_scvtf_f16_i64(<vscale x 8 x half> %a,<vscale x 2 x i64> %b) { ; CHECK-LABEL: define <vscale x 8 x half> @test_scvtf_f16_i64( ; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i64(<vscale x 8 x half> undef, <vscale x 2 x i1> [[PG]], <vscale x 2 x i64> [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i64(<vscale x 8 x half> undef, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[B]]) ; CHECK-NEXT: ret <vscale x 8 x half> [[OUT]] ; %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) @@ -341,8 +313,7 @@ define <vscale x 8 x half> @test_scvtf_f16_i64(<vscale x 8 x half> %a,<vscale x define <vscale x 4 x float> @test_scvtf_f32_i64(<vscale x 4 x float> %a, <vscale x 2 x i64> %b) { ; CHECK-LABEL: define <vscale x 4 x float> @test_scvtf_f32_i64( ; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.f32i64(<vscale x 4 x float> undef, <vscale x 2 x i1> [[PG]], <vscale x 2 x i64> [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.f32i64(<vscale x 4 x float> undef, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[B]]) ; CHECK-NEXT: ret <vscale x 4 x float> [[OUT]] ; %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) @@ -353,8 +324,7 @@ define <vscale x 4 x float> @test_scvtf_f32_i64(<vscale x 4 x float> %a, <vscale define <vscale x 2 x double> @test_scvtf_f64_i32(<vscale x 2 x double> %a, <vscale x 4 x i32> %b) { ; CHECK-LABEL: define <vscale x 2 x double> @test_scvtf_f64_i32( ; CHECK-SAME: <vscale x 2 x double> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.f64i32(<vscale x 2 x double> undef, <vscale x 2 x i1> [[PG]], <vscale x 4 x i32> [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.f64i32(<vscale x 2 x double> undef, <vscale x 2 x i1> splat (i1 true), <vscale x 4 x i32> [[B]]) ; CHECK-NEXT: ret <vscale x 2 x double> [[OUT]] ; %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) @@ -365,8 +335,7 @@ define <vscale x 2 x double> @test_scvtf_f64_i32(<vscale x 2 x double> %a, <vs define <vscale x 8 x half> @test_ucvtf(<vscale x 8 x half> %a, <vscale x 8 x i16> %b) { ; CHECK-LABEL: define <vscale x 8 x half> @test_ucvtf( ; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.nxv8f16.nxv8i16(<vscale x 8 x half> undef, <vscale x 8 x i1> [[PG]], <vscale x 8 x i16> [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.nxv8f16.nxv8i16(<vscale x 8 x half> undef, <vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[B]]) ; CHECK-NEXT: ret <vscale x 8 x half> [[OUT]] ; %pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) @@ -377,8 +346,7 @@ define <vscale x 8 x half> @test_ucvtf(<vscale x 8 x half> %a, <vscale x 8 x i16 define <vscale x 8 x half> @test_ucvtf_f16_i32(<vscale x 8 x half> %a, <vscale x 4 x i32> %b) { ; CHECK-LABEL: define <vscale x 8 x half> @test_ucvtf_f16_i32( ; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i32(<vscale x 8 x half> undef, <vscale x 4 x i1> [[PG]], <vscale x 4 x i32> [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i32(<vscale x 8 x half> undef, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[B]]) ; CHECK-NEXT: ret <vscale x 8 x half> [[OUT]] ; %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) @@ -389,8 +357,7 @@ define <vscale x 8 x half> @test_ucvtf_f16_i32(<vscale x 8 x half> %a, <vscale x define <vscale x 8 x half> @test_ucvtf_f16_i64(<vscale x 8 x half> %a,<vscale x 2 x i64> %b) { ; CHECK-LABEL: define <vscale x 8 x half> @test_ucvtf_f16_i64( ; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i64(<vscale x 8 x half> undef, <vscale x 2 x i1> [[PG]], <vscale x 2 x i64> [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i64(<vscale x 8 x half> undef, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[B]]) ; CHECK-NEXT: ret <vscale x 8 x half> [[OUT]] ; %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) @@ -401,8 +368,7 @@ define <vscale x 8 x half> @test_ucvtf_f16_i64(<vscale x 8 x half> %a,<vscale x define <vscale x 4 x float> @test_ucvtf_f32_i64(<vscale x 4 x float> %a, <vscale x 2 x i64> %b) { ; CHECK-LABEL: define <vscale x 4 x float> @test_ucvtf_f32_i64( ; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.f32i64(<vscale x 4 x float> undef, <vscale x 2 x i1> [[PG]], <vscale x 2 x i64> [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.f32i64(<vscale x 4 x float> undef, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[B]]) ; CHECK-NEXT: ret <vscale x 4 x float> [[OUT]] ; %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) @@ -413,8 +379,7 @@ define <vscale x 4 x float> @test_ucvtf_f32_i64(<vscale x 4 x float> %a, <vscale define <vscale x 2 x double> @test_ucvtf_f64_i32(<vscale x 2 x double> %a, <vscale x 4 x i32> %b) { ; CHECK-LABEL: define <vscale x 2 x double> @test_ucvtf_f64_i32( ; CHECK-SAME: <vscale x 2 x double> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.f64i32(<vscale x 2 x double> undef, <vscale x 2 x i1> [[PG]], <vscale x 4 x i32> [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.f64i32(<vscale x 2 x double> undef, <vscale x 2 x i1> splat (i1 true), <vscale x 4 x i32> [[B]]) ; CHECK-NEXT: ret <vscale x 2 x double> [[OUT]] ; %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-loadstore.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-loadstore.ll index c67662f87250..d8d674029853 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-loadstore.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-loadstore.ll @@ -5,7 +5,7 @@ target triple = "aarch64-unknown-linux-gnu" define <vscale x 4 x i32> @combine_ld1(ptr %ptr) #0 { ; CHECK-LABEL: @combine_ld1( -; CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 4 x i32>, ptr [[PTR:%.*]], align 16, !annotation !0 +; CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 4 x i32>, ptr [[PTR:%.*]], align 16, !annotation [[META0:![0-9]+]] ; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]] ; %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) @@ -15,7 +15,7 @@ define <vscale x 4 x i32> @combine_ld1(ptr %ptr) #0 { define <vscale x 4 x i32> @combine_ld1_casted_predicate(ptr %ptr) #0 { ; CHECK-LABEL: @combine_ld1_casted_predicate( -; CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 4 x i32>, ptr [[PTR:%.*]], align 16, !annotation !0 +; CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 4 x i32>, ptr [[PTR:%.*]], align 16, !annotation [[META0]] ; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]] ; %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) @@ -28,7 +28,7 @@ define <vscale x 4 x i32> @combine_ld1_casted_predicate(ptr %ptr) #0 { define <vscale x 4 x i32> @combine_ld1_masked(ptr %ptr) #0 { ; CHECK-LABEL: @combine_ld1_masked( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 16) -; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[PTR:%.*]], i32 1, <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> zeroinitializer), !annotation !0 +; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[PTR:%.*]], i32 1, <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> zeroinitializer), !annotation [[META0]] ; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP3]] ; %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 16) @@ -38,10 +38,9 @@ define <vscale x 4 x i32> @combine_ld1_masked(ptr %ptr) #0 { define <vscale x 8 x i16> @combine_ld1_masked_casted_predicate(ptr %ptr) #0 { ; CHECK-LABEL: @combine_ld1_masked_casted_predicate( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[TMP2]]) -; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr [[PTR:%.*]], i32 1, <vscale x 8 x i1> [[TMP3]], <vscale x 8 x i16> zeroinitializer), !annotation !0 +; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr [[PTR:%.*]], i32 1, <vscale x 8 x i1> [[TMP3]], <vscale x 8 x i16> zeroinitializer), !annotation [[META0]] ; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP5]] ; %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) @@ -53,7 +52,7 @@ define <vscale x 8 x i16> @combine_ld1_masked_casted_predicate(ptr %ptr) #0 { define void @combine_st1(<vscale x 4 x i32> %vec, ptr %ptr) #0 { ; CHECK-LABEL: @combine_st1( -; CHECK-NEXT: store <vscale x 4 x i32> [[VEC:%.*]], ptr [[PTR:%.*]], align 16, !annotation !0 +; CHECK-NEXT: store <vscale x 4 x i32> [[VEC:%.*]], ptr [[PTR:%.*]], align 16, !annotation [[META0]] ; CHECK-NEXT: ret void ; %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) @@ -63,7 +62,7 @@ define void @combine_st1(<vscale x 4 x i32> %vec, ptr %ptr) #0 { define void @combine_st1_casted_predicate(<vscale x 4 x i32> %vec, ptr %ptr) #0 { ; CHECK-LABEL: @combine_st1_casted_predicate( -; CHECK-NEXT: store <vscale x 4 x i32> [[VEC:%.*]], ptr [[PTR:%.*]], align 16, !annotation !0 +; CHECK-NEXT: store <vscale x 4 x i32> [[VEC:%.*]], ptr [[PTR:%.*]], align 16, !annotation [[META0]] ; CHECK-NEXT: ret void ; %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) @@ -76,7 +75,7 @@ define void @combine_st1_casted_predicate(<vscale x 4 x i32> %vec, ptr %ptr) #0 define void @combine_st1_masked(<vscale x 4 x i32> %vec, ptr %ptr) #0 { ; CHECK-LABEL: @combine_st1_masked( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 16) -; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[VEC:%.*]], ptr [[PTR:%.*]], i32 1, <vscale x 4 x i1> [[TMP1]]), !annotation !0 +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[VEC:%.*]], ptr [[PTR:%.*]], i32 1, <vscale x 4 x i1> [[TMP1]]), !annotation [[META0]] ; CHECK-NEXT: ret void ; %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 16) @@ -86,10 +85,9 @@ define void @combine_st1_masked(<vscale x 4 x i32> %vec, ptr %ptr) #0 { define void @combine_st1_masked_casted_predicate(<vscale x 8 x i16> %vec, ptr %ptr) #0 { ; CHECK-LABEL: @combine_st1_masked_casted_predicate( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[TMP2]]) -; CHECK-NEXT: call void @llvm.masked.store.nxv8i16.p0(<vscale x 8 x i16> [[VEC:%.*]], ptr [[PTR:%.*]], i32 1, <vscale x 8 x i1> [[TMP3]]), !annotation !0 +; CHECK-NEXT: call void @llvm.masked.store.nxv8i16.p0(<vscale x 8 x i16> [[VEC:%.*]], ptr [[PTR:%.*]], i32 1, <vscale x 8 x i1> [[TMP3]]), !annotation [[META0]] ; CHECK-NEXT: ret void ; %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-cmpne.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-cmpne.ll index 6a2c0f8689ca..93de2e731606 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-cmpne.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-cmpne.ll @@ -22,8 +22,7 @@ define <vscale x 16 x i1> @dupq_b_0() #0 { define <vscale x 16 x i1> @dupq_b_d() #0 { ; CHECK-LABEL: define <vscale x 16 x i1> @dupq_b_d( ; CHECK-SAME: ) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 16 x i1> [[TMP2]] ; %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) @@ -38,8 +37,7 @@ define <vscale x 16 x i1> @dupq_b_d() #0 { define <vscale x 16 x i1> @dupq_b_w() #0 { ; CHECK-LABEL: define <vscale x 16 x i1> @dupq_b_w( ; CHECK-SAME: ) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 16 x i1> [[TMP2]] ; %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) @@ -54,8 +52,7 @@ define <vscale x 16 x i1> @dupq_b_w() #0 { define <vscale x 16 x i1> @dupq_b_h() #0 { ; CHECK-LABEL: define <vscale x 16 x i1> @dupq_b_h( ; CHECK-SAME: ) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 16 x i1> [[TMP2]] ; %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) @@ -70,8 +67,7 @@ define <vscale x 16 x i1> @dupq_b_h() #0 { define <vscale x 16 x i1> @dupq_b_b() #0 { ; CHECK-LABEL: define <vscale x 16 x i1> @dupq_b_b( ; CHECK-SAME: ) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) -; CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]] +; CHECK-NEXT: ret <vscale x 16 x i1> splat (i1 true) ; %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %2 = tail call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> poison, @@ -101,9 +97,8 @@ define <vscale x 8 x i1> @dupq_h_0() #0 { define <vscale x 8 x i1> @dupq_h_d() #0 { ; CHECK-LABEL: define <vscale x 8 x i1> @dupq_h_d( ; CHECK-SAME: ) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]]) -; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[TMP2]]) +; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> splat (i1 true)) +; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[TMP1]]) ; CHECK-NEXT: ret <vscale x 8 x i1> [[TMP3]] ; %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) @@ -118,9 +113,8 @@ define <vscale x 8 x i1> @dupq_h_d() #0 { define <vscale x 8 x i1> @dupq_h_w() #0 { ; CHECK-LABEL: define <vscale x 8 x i1> @dupq_h_w( ; CHECK-SAME: ) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]]) -; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[TMP2]]) +; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> splat (i1 true)) +; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[TMP1]]) ; CHECK-NEXT: ret <vscale x 8 x i1> [[TMP3]] ; %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) @@ -135,8 +129,7 @@ define <vscale x 8 x i1> @dupq_h_w() #0 { define <vscale x 8 x i1> @dupq_h_h() #0 { ; CHECK-LABEL: define <vscale x 8 x i1> @dupq_h_h( ; CHECK-SAME: ) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: ret <vscale x 8 x i1> [[TMP1]] +; CHECK-NEXT: ret <vscale x 8 x i1> splat (i1 true) ; %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %2 = tail call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> poison, @@ -166,9 +159,8 @@ define <vscale x 4 x i1> @dupq_w_0() #0 { define <vscale x 4 x i1> @dupq_w_d() #0 { ; CHECK-LABEL: define <vscale x 4 x i1> @dupq_w_d( ; CHECK-SAME: ) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]]) -; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[TMP2]]) +; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> splat (i1 true)) +; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[TMP1]]) ; CHECK-NEXT: ret <vscale x 4 x i1> [[TMP3]] ; %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) @@ -183,8 +175,7 @@ define <vscale x 4 x i1> @dupq_w_d() #0 { define <vscale x 4 x i1> @dupq_w_w() #0 { ; CHECK-LABEL: define <vscale x 4 x i1> @dupq_w_w( ; CHECK-SAME: ) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: ret <vscale x 4 x i1> [[TMP1]] +; CHECK-NEXT: ret <vscale x 4 x i1> splat (i1 true) ; %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %2 = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, @@ -214,8 +205,7 @@ define <vscale x 2 x i1> @dupq_d_0() #0 { define <vscale x 2 x i1> @dupq_d_d() #0 { ; CHECK-LABEL: define <vscale x 2 x i1> @dupq_d_d( ; CHECK-SAME: ) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: ret <vscale x 2 x i1> [[TMP1]] +; CHECK-NEXT: ret <vscale x 2 x i1> splat (i1 true) ; %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %2 = tail call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> poison, @@ -231,10 +221,9 @@ define <vscale x 2 x i1> @dupq_d_d() #0 { define <vscale x 2 x i1> @dupq_neg1() #0 { ; CHECK-LABEL: define <vscale x 2 x i1> @dupq_neg1( ; CHECK-SAME: ) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) ; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> poison, <2 x i64> <i64 1, i64 0>, i64 0) ; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> [[TMP2]], i64 0) -; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> zeroinitializer) +; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> zeroinitializer) ; CHECK-NEXT: ret <vscale x 2 x i1> [[TMP4]] ; %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) @@ -249,10 +238,9 @@ define <vscale x 2 x i1> @dupq_neg1() #0 { define <vscale x 4 x i1> @dupq_neg2() #0 { ; CHECK-LABEL: define <vscale x 4 x i1> @dupq_neg2( ; CHECK-SAME: ) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) ; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 1>, i64 0) ; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> [[TMP2]], i64 0) -; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.wide.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[TMP3]], <vscale x 2 x i64> zeroinitializer) +; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.wide.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP3]], <vscale x 2 x i64> zeroinitializer) ; CHECK-NEXT: ret <vscale x 4 x i1> [[TMP4]] ; %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) @@ -267,10 +255,9 @@ define <vscale x 4 x i1> @dupq_neg2() #0 { define <vscale x 4 x i1> @dupq_neg3() #0 { ; CHECK-LABEL: define <vscale x 4 x i1> @dupq_neg3( ; CHECK-SAME: ) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) ; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>, i64 0) ; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> [[TMP2]], i64 0) -; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.wide.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[TMP3]], <vscale x 2 x i64> zeroinitializer) +; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.wide.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP3]], <vscale x 2 x i64> zeroinitializer) ; CHECK-NEXT: ret <vscale x 4 x i1> [[TMP4]] ; %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) @@ -285,10 +272,9 @@ define <vscale x 4 x i1> @dupq_neg3() #0 { define <vscale x 4 x i1> @dupq_neg4() #0 { ; CHECK-LABEL: define <vscale x 4 x i1> @dupq_neg4( ; CHECK-SAME: ) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) ; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 0, i32 0>, i64 0) ; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> [[TMP2]], i64 0) -; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.wide.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[TMP3]], <vscale x 2 x i64> zeroinitializer) +; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.wide.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP3]], <vscale x 2 x i64> zeroinitializer) ; CHECK-NEXT: ret <vscale x 4 x i1> [[TMP4]] ; %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) @@ -303,10 +289,9 @@ define <vscale x 4 x i1> @dupq_neg4() #0 { define <vscale x 4 x i1> @dupq_neg5() #0 { ; CHECK-LABEL: define <vscale x 4 x i1> @dupq_neg5( ; CHECK-SAME: ) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) ; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>, i64 0) ; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> [[TMP2]], i64 0) -; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.wide.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[TMP3]], <vscale x 2 x i64> zeroinitializer) +; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.wide.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP3]], <vscale x 2 x i64> zeroinitializer) ; CHECK-NEXT: ret <vscale x 4 x i1> [[TMP4]] ; %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) @@ -321,12 +306,11 @@ define <vscale x 4 x i1> @dupq_neg5() #0 { define <vscale x 4 x i1> @dupq_neg6(i1 %a) #0 { ; CHECK-LABEL: define <vscale x 4 x i1> @dupq_neg6( ; CHECK-SAME: i1 [[A:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) ; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[A]] to i32 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> <i32 1, i32 1, i32 1, i32 poison>, i32 [[TMP2]], i64 3 ; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> [[TMP3]], i64 0) ; CHECK-NEXT: [[TMP5:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> [[TMP4]], i64 0) -; CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.wide.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[TMP5]], <vscale x 2 x i64> zeroinitializer) +; CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.wide.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP5]], <vscale x 2 x i64> zeroinitializer) ; CHECK-NEXT: ret <vscale x 4 x i1> [[TMP6]] ; %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) @@ -342,10 +326,9 @@ define <vscale x 4 x i1> @dupq_neg6(i1 %a) #0 { define <vscale x 2 x i1> @dupq_neg7() #0 { ; CHECK-LABEL: define <vscale x 2 x i1> @dupq_neg7( ; CHECK-SAME: ) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) ; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> poison, <2 x i64> splat (i64 1), i64 2) ; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> [[TMP2]], i64 0) -; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> zeroinitializer) +; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> zeroinitializer) ; CHECK-NEXT: ret <vscale x 2 x i1> [[TMP4]] ; %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) @@ -360,10 +343,9 @@ define <vscale x 2 x i1> @dupq_neg7() #0 { define <vscale x 2 x i1> @dupq_neg8() #0 { ; CHECK-LABEL: define <vscale x 2 x i1> @dupq_neg8( ; CHECK-SAME: ) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) ; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> poison, <2 x i64> splat (i64 1), i64 0) ; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> [[TMP2]], i64 1) -; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> zeroinitializer) +; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> zeroinitializer) ; CHECK-NEXT: ret <vscale x 2 x i1> [[TMP4]] ; %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) @@ -378,10 +360,9 @@ define <vscale x 2 x i1> @dupq_neg8() #0 { define <vscale x 2 x i1> @dupq_neg9(<vscale x 2 x i64> %x) #0 { ; CHECK-LABEL: define <vscale x 2 x i1> @dupq_neg9( ; CHECK-SAME: <vscale x 2 x i64> [[X:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) ; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> [[X]], <2 x i64> splat (i64 1), i64 0) ; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> [[TMP2]], i64 0) -; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> zeroinitializer) +; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> zeroinitializer) ; CHECK-NEXT: ret <vscale x 2 x i1> [[TMP4]] ; %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) @@ -396,10 +377,9 @@ define <vscale x 2 x i1> @dupq_neg9(<vscale x 2 x i64> %x) #0 { define <vscale x 2 x i1> @dupq_neg10() #0 { ; CHECK-LABEL: define <vscale x 2 x i1> @dupq_neg10( ; CHECK-SAME: ) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) ; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> poison, <2 x i64> splat (i64 1), i64 0) ; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> [[TMP2]], i64 0) -; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> splat (i64 1)) +; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> splat (i64 1)) ; CHECK-NEXT: ret <vscale x 2 x i1> [[TMP4]] ; %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) @@ -448,10 +428,9 @@ define <vscale x 2 x i1> @dupq_neg12() #0 { define <vscale x 2 x i1> @dupq_neg13(<vscale x 2 x i64> %x) #0 { ; CHECK-LABEL: define <vscale x 2 x i1> @dupq_neg13( ; CHECK-SAME: <vscale x 2 x i64> [[X:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) ; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> poison, <2 x i64> splat (i64 1), i64 0) ; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> [[TMP2]], i64 0) -; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[X]]) +; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[X]]) ; CHECK-NEXT: ret <vscale x 2 x i1> [[TMP4]] ; %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) @@ -465,10 +444,9 @@ define <vscale x 2 x i1> @dupq_neg13(<vscale x 2 x i64> %x) #0 { define <vscale x 16 x i1> @dupq_b_idx(i64 %idx) #0 { ; CHECK-LABEL: define <vscale x 16 x i1> @dupq_b_idx( ; CHECK-SAME: i64 [[IDX:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) ; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> poison, <16 x i8> zeroinitializer, i64 0) ; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8> [[TMP2]], i64 [[IDX]]) -; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.wide.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[TMP3]], <vscale x 2 x i64> zeroinitializer) +; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.wide.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[TMP3]], <vscale x 2 x i64> zeroinitializer) ; CHECK-NEXT: ret <vscale x 16 x i1> [[TMP4]] ; %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-strictfp.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-strictfp.ll index f6f60d6d64e7..690bcd3dc033 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-strictfp.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-strictfp.ll @@ -8,8 +8,7 @@ target triple = "aarch64-unknown-linux-gnu" define <vscale x 2 x double> @replace_fadd_intrinsic_double_strictfp(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 { ; CHECK: Function Attrs: strictfp ; CHECK-LABEL: @replace_fadd_intrinsic_double_strictfp( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) #[[ATTR2:[0-9]+]] -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fadd.u.nxv2f64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fadd.u.nxv2f64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR2:[0-9]+]] ; CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]] ; %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) #1 @@ -22,8 +21,7 @@ define <vscale x 2 x double> @replace_fadd_intrinsic_double_strictfp(<vscale x 2 define <vscale x 2 x double> @call_replace_fadd_intrinsic_double_strictfp(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 { ; CHECK: Function Attrs: strictfp ; CHECK-LABEL: @call_replace_fadd_intrinsic_double_strictfp( -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) #[[ATTR2]] -; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fadd.u.nxv2f64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fadd.u.nxv2f64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR2]] ; CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]] ; %1 = call <vscale x 2 x double> @replace_fadd_intrinsic_double_strictfp(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #1 @@ -35,8 +33,7 @@ define <vscale x 2 x double> @call_replace_fadd_intrinsic_double_strictfp(<vscal define <vscale x 2 x double> @replace_fmul_intrinsic_double_strictfp(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 { ; CHECK: Function Attrs: strictfp ; CHECK-LABEL: @replace_fmul_intrinsic_double_strictfp( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) #[[ATTR2]] -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmul.u.nxv2f64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmul.u.nxv2f64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR2]] ; CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]] ; %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) #1 @@ -49,8 +46,7 @@ define <vscale x 2 x double> @replace_fmul_intrinsic_double_strictfp(<vscale x 2 define <vscale x 2 x double> @call_replace_fmul_intrinsic_double_strictfp(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 { ; CHECK: Function Attrs: strictfp ; CHECK-LABEL: @call_replace_fmul_intrinsic_double_strictfp( -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) #[[ATTR2]] -; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.u.nxv2f64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.u.nxv2f64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR2]] ; CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]] ; %1 = call <vscale x 2 x double> @replace_fmul_intrinsic_double_strictfp(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #1 @@ -62,8 +58,7 @@ define <vscale x 2 x double> @call_replace_fmul_intrinsic_double_strictfp(<vscal define <vscale x 2 x double> @replace_fsub_intrinsic_double_strictfp(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 { ; CHECK: Function Attrs: strictfp ; CHECK-LABEL: @replace_fsub_intrinsic_double_strictfp( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) #[[ATTR2]] -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fsub.u.nxv2f64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fsub.u.nxv2f64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR2]] ; CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]] ; %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) #1 @@ -76,8 +71,7 @@ define <vscale x 2 x double> @replace_fsub_intrinsic_double_strictfp(<vscale x 2 define <vscale x 2 x double> @call_replace_fsub_intrinsic_double_strictfp(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 { ; CHECK: Function Attrs: strictfp ; CHECK-LABEL: @call_replace_fsub_intrinsic_double_strictfp( -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) #[[ATTR2]] -; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fsub.u.nxv2f64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fsub.u.nxv2f64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR2]] ; CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]] ; %1 = call <vscale x 2 x double> @replace_fsub_intrinsic_double_strictfp(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #1 diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-combine-to-u-forms.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-combine-to-u-forms.ll index 60b2efe27168..bbf4b3c65c30 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-combine-to-u-forms.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-combine-to-u-forms.ll @@ -16,9 +16,8 @@ declare <vscale x 8 x half> @llvm.aarch64.sve.fabd.nxv8f16(<vscale x 8 x i1>, <v define <vscale x 8 x half> @replace_fabd_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 { ; CHECK-LABEL: define <vscale x 8 x half> @replace_fabd_intrinsic_half ; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1:[0-9]+]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fabd.u.nxv8f16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]]) -; CHECK-NEXT: ret <vscale x 8 x half> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fabd.u.nxv8f16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]]) +; CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]] ; %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fabd.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b) @@ -29,9 +28,8 @@ declare <vscale x 4 x float> @llvm.aarch64.sve.fabd.nxv4f32(<vscale x 4 x i1>, < define <vscale x 4 x float> @replace_fabd_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 { ; CHECK-LABEL: define <vscale x 4 x float> @replace_fabd_intrinsic_float ; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fabd.u.nxv4f32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]]) -; CHECK-NEXT: ret <vscale x 4 x float> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fabd.u.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]]) +; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] ; %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fabd.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b) @@ -42,9 +40,8 @@ declare <vscale x 2 x double> @llvm.aarch64.sve.fabd.nxv2f64(<vscale x 2 x i1>, define <vscale x 2 x double> @replace_fabd_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 { ; CHECK-LABEL: define <vscale x 2 x double> @replace_fabd_intrinsic_double ; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fabd.u.nxv2f64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]]) -; CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fabd.u.nxv2f64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]]) +; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fabd.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b) @@ -117,9 +114,8 @@ declare <vscale x 8 x half> @llvm.aarch64.sve.fdiv.nxv8f16(<vscale x 8 x i1>, <v define <vscale x 8 x half> @replace_fdiv_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 { ; CHECK-LABEL: define <vscale x 8 x half> @replace_fdiv_intrinsic_half ; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fdiv.u.nxv8f16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]]) -; CHECK-NEXT: ret <vscale x 8 x half> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fdiv.u.nxv8f16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]]) +; CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]] ; %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fdiv.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b) @@ -130,9 +126,8 @@ declare <vscale x 4 x float> @llvm.aarch64.sve.fdiv.nxv4f32(<vscale x 4 x i1>, < define <vscale x 4 x float> @replace_fdiv_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 { ; CHECK-LABEL: define <vscale x 4 x float> @replace_fdiv_intrinsic_float ; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fdiv.u.nxv4f32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]]) -; CHECK-NEXT: ret <vscale x 4 x float> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fdiv.u.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]]) +; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] ; %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fdiv.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b) @@ -143,9 +138,8 @@ declare <vscale x 2 x double> @llvm.aarch64.sve.fdiv.nxv2f64(<vscale x 2 x i1>, define <vscale x 2 x double> @replace_fdiv_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 { ; CHECK-LABEL: define <vscale x 2 x double> @replace_fdiv_intrinsic_double ; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fdiv.u.nxv2f64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]]) -; CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fdiv.u.nxv2f64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]]) +; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fdiv.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b) @@ -168,9 +162,8 @@ declare <vscale x 8 x half> @llvm.aarch64.sve.fmax.nxv8f16(<vscale x 8 x i1>, <v define <vscale x 8 x half> @replace_fmax_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 { ; CHECK-LABEL: define <vscale x 8 x half> @replace_fmax_intrinsic_half ; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmax.u.nxv8f16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]]) -; CHECK-NEXT: ret <vscale x 8 x half> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmax.u.nxv8f16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]]) +; CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]] ; %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmax.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b) @@ -181,9 +174,8 @@ declare <vscale x 4 x float> @llvm.aarch64.sve.fmax.nxv4f32(<vscale x 4 x i1>, < define <vscale x 4 x float> @replace_fmax_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 { ; CHECK-LABEL: define <vscale x 4 x float> @replace_fmax_intrinsic_float ; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmax.u.nxv4f32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]]) -; CHECK-NEXT: ret <vscale x 4 x float> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmax.u.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]]) +; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] ; %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmax.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b) @@ -194,9 +186,8 @@ declare <vscale x 2 x double> @llvm.aarch64.sve.fmax.nxv2f64(<vscale x 2 x i1>, define <vscale x 2 x double> @replace_fmax_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 { ; CHECK-LABEL: define <vscale x 2 x double> @replace_fmax_intrinsic_double ; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmax.u.nxv2f64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]]) -; CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmax.u.nxv2f64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]]) +; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmax.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b) @@ -219,9 +210,8 @@ declare <vscale x 8 x half> @llvm.aarch64.sve.fmaxnm.nxv8f16(<vscale x 8 x i1>, define <vscale x 8 x half> @replace_fmaxnm_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 { ; CHECK-LABEL: define <vscale x 8 x half> @replace_fmaxnm_intrinsic_half ; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmaxnm.u.nxv8f16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]]) -; CHECK-NEXT: ret <vscale x 8 x half> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmaxnm.u.nxv8f16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]]) +; CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]] ; %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmaxnm.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b) @@ -232,9 +222,8 @@ declare <vscale x 4 x float> @llvm.aarch64.sve.fmaxnm.nxv4f32(<vscale x 4 x i1>, define <vscale x 4 x float> @replace_fmaxnm_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 { ; CHECK-LABEL: define <vscale x 4 x float> @replace_fmaxnm_intrinsic_float ; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmaxnm.u.nxv4f32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]]) -; CHECK-NEXT: ret <vscale x 4 x float> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmaxnm.u.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]]) +; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] ; %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmaxnm.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b) @@ -245,9 +234,8 @@ declare <vscale x 2 x double> @llvm.aarch64.sve.fmaxnm.nxv2f64(<vscale x 2 x i1> define <vscale x 2 x double> @replace_fmaxnm_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 { ; CHECK-LABEL: define <vscale x 2 x double> @replace_fmaxnm_intrinsic_double ; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmaxnm.u.nxv2f64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]]) -; CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmaxnm.u.nxv2f64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]]) +; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmaxnm.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b) @@ -270,9 +258,8 @@ declare <vscale x 8 x half> @llvm.aarch64.sve.fmin.nxv8f16(<vscale x 8 x i1>, <v define <vscale x 8 x half> @replace_fmin_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 { ; CHECK-LABEL: define <vscale x 8 x half> @replace_fmin_intrinsic_half ; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmin.u.nxv8f16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]]) -; CHECK-NEXT: ret <vscale x 8 x half> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmin.u.nxv8f16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]]) +; CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]] ; %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmin.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b) @@ -283,9 +270,8 @@ declare <vscale x 4 x float> @llvm.aarch64.sve.fmin.nxv4f32(<vscale x 4 x i1>, < define <vscale x 4 x float> @replace_fmin_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 { ; CHECK-LABEL: define <vscale x 4 x float> @replace_fmin_intrinsic_float ; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmin.u.nxv4f32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]]) -; CHECK-NEXT: ret <vscale x 4 x float> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmin.u.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]]) +; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] ; %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmin.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b) @@ -296,9 +282,8 @@ declare <vscale x 2 x double> @llvm.aarch64.sve.fmin.nxv2f64(<vscale x 2 x i1>, define <vscale x 2 x double> @replace_fmin_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 { ; CHECK-LABEL: define <vscale x 2 x double> @replace_fmin_intrinsic_double ; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmin.u.nxv2f64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]]) -; CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmin.u.nxv2f64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]]) +; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmin.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b) @@ -321,9 +306,8 @@ declare <vscale x 8 x half> @llvm.aarch64.sve.fminnm.nxv8f16(<vscale x 8 x i1>, define <vscale x 8 x half> @replace_fminnm_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 { ; CHECK-LABEL: define <vscale x 8 x half> @replace_fminnm_intrinsic_half ; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fminnm.u.nxv8f16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]]) -; CHECK-NEXT: ret <vscale x 8 x half> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fminnm.u.nxv8f16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]]) +; CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]] ; %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fminnm.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b) @@ -334,9 +318,8 @@ declare <vscale x 4 x float> @llvm.aarch64.sve.fminnm.nxv4f32(<vscale x 4 x i1>, define <vscale x 4 x float> @replace_fminnm_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 { ; CHECK-LABEL: define <vscale x 4 x float> @replace_fminnm_intrinsic_float ; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fminnm.u.nxv4f32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]]) -; CHECK-NEXT: ret <vscale x 4 x float> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fminnm.u.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]]) +; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] ; %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fminnm.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b) @@ -347,9 +330,8 @@ declare <vscale x 2 x double> @llvm.aarch64.sve.fminnm.nxv2f64(<vscale x 2 x i1> define <vscale x 2 x double> @replace_fminnm_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 { ; CHECK-LABEL: define <vscale x 2 x double> @replace_fminnm_intrinsic_double ; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fminnm.u.nxv2f64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]]) -; CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fminnm.u.nxv2f64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]]) +; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fminnm.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b) @@ -372,9 +354,8 @@ declare <vscale x 8 x half> @llvm.aarch64.sve.fmla.nxv8f16(<vscale x 8 x i1>, <v define <vscale x 8 x half> @replace_fmla_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) #0 { ; CHECK-LABEL: define <vscale x 8 x half> @replace_fmla_intrinsic_half ; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]], <vscale x 8 x half> [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmla.u.nxv8f16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]], <vscale x 8 x half> [[C]]) -; CHECK-NEXT: ret <vscale x 8 x half> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmla.u.nxv8f16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]], <vscale x 8 x half> [[C]]) +; CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]] ; %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmla.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) @@ -385,9 +366,8 @@ declare <vscale x 4 x float> @llvm.aarch64.sve.fmla.nxv4f32(<vscale x 4 x i1>, < define <vscale x 4 x float> @replace_fmla_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #0 { ; CHECK-LABEL: define <vscale x 4 x float> @replace_fmla_intrinsic_float ; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]], <vscale x 4 x float> [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmla.u.nxv4f32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]], <vscale x 4 x float> [[C]]) -; CHECK-NEXT: ret <vscale x 4 x float> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmla.u.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]], <vscale x 4 x float> [[C]]) +; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] ; %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmla.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) @@ -398,9 +378,8 @@ declare <vscale x 2 x double> @llvm.aarch64.sve.fmla.nxv2f64(<vscale x 2 x i1>, define <vscale x 2 x double> @replace_fmla_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) #0 { ; CHECK-LABEL: define <vscale x 2 x double> @replace_fmla_intrinsic_double ; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]], <vscale x 2 x double> [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmla.u.nxv2f64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]], <vscale x 2 x double> [[C]]) -; CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmla.u.nxv2f64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]], <vscale x 2 x double> [[C]]) +; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmla.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) @@ -423,9 +402,8 @@ declare <vscale x 8 x half> @llvm.aarch64.sve.fmls.nxv8f16(<vscale x 8 x i1>, <v define <vscale x 8 x half> @replace_fmls_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) #0 { ; CHECK-LABEL: define <vscale x 8 x half> @replace_fmls_intrinsic_half ; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]], <vscale x 8 x half> [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmls.u.nxv8f16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]], <vscale x 8 x half> [[C]]) -; CHECK-NEXT: ret <vscale x 8 x half> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmls.u.nxv8f16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]], <vscale x 8 x half> [[C]]) +; CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]] ; %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmls.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) @@ -436,9 +414,8 @@ declare <vscale x 4 x float> @llvm.aarch64.sve.fmls.nxv4f32(<vscale x 4 x i1>, < define <vscale x 4 x float> @replace_fmls_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #0 { ; CHECK-LABEL: define <vscale x 4 x float> @replace_fmls_intrinsic_float ; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]], <vscale x 4 x float> [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmls.u.nxv4f32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]], <vscale x 4 x float> [[C]]) -; CHECK-NEXT: ret <vscale x 4 x float> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmls.u.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]], <vscale x 4 x float> [[C]]) +; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] ; %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmls.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) @@ -449,9 +426,8 @@ declare <vscale x 2 x double> @llvm.aarch64.sve.fmls.nxv2f64(<vscale x 2 x i1>, define <vscale x 2 x double> @replace_fmls_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) #0 { ; CHECK-LABEL: define <vscale x 2 x double> @replace_fmls_intrinsic_double ; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]], <vscale x 2 x double> [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmls.u.nxv2f64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]], <vscale x 2 x double> [[C]]) -; CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmls.u.nxv2f64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]], <vscale x 2 x double> [[C]]) +; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmls.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) @@ -524,9 +500,8 @@ declare <vscale x 8 x half> @llvm.aarch64.sve.fmulx.nxv8f16(<vscale x 8 x i1>, < define <vscale x 8 x half> @replace_fmulx_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 { ; CHECK-LABEL: define <vscale x 8 x half> @replace_fmulx_intrinsic_half ; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmulx.u.nxv8f16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]]) -; CHECK-NEXT: ret <vscale x 8 x half> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmulx.u.nxv8f16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]]) +; CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]] ; %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmulx.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b) @@ -537,9 +512,8 @@ declare <vscale x 4 x float> @llvm.aarch64.sve.fmulx.nxv4f32(<vscale x 4 x i1>, define <vscale x 4 x float> @replace_fmulx_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 { ; CHECK-LABEL: define <vscale x 4 x float> @replace_fmulx_intrinsic_float ; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmulx.u.nxv4f32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]]) -; CHECK-NEXT: ret <vscale x 4 x float> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmulx.u.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]]) +; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] ; %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmulx.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b) @@ -550,9 +524,8 @@ declare <vscale x 2 x double> @llvm.aarch64.sve.fmulx.nxv2f64(<vscale x 2 x i1>, define <vscale x 2 x double> @replace_fmulx_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 { ; CHECK-LABEL: define <vscale x 2 x double> @replace_fmulx_intrinsic_double ; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmulx.u.nxv2f64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]]) -; CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmulx.u.nxv2f64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]]) +; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmulx.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b) @@ -575,9 +548,8 @@ declare <vscale x 8 x half> @llvm.aarch64.sve.fnmla.nxv8f16(<vscale x 8 x i1>, < define <vscale x 8 x half> @replace_fnmla_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) #0 { ; CHECK-LABEL: define <vscale x 8 x half> @replace_fnmla_intrinsic_half ; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]], <vscale x 8 x half> [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fnmla.u.nxv8f16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]], <vscale x 8 x half> [[C]]) -; CHECK-NEXT: ret <vscale x 8 x half> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fnmla.u.nxv8f16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]], <vscale x 8 x half> [[C]]) +; CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]] ; %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fnmla.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) @@ -588,9 +560,8 @@ declare <vscale x 4 x float> @llvm.aarch64.sve.fnmla.nxv4f32(<vscale x 4 x i1>, define <vscale x 4 x float> @replace_fnmla_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #0 { ; CHECK-LABEL: define <vscale x 4 x float> @replace_fnmla_intrinsic_float ; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]], <vscale x 4 x float> [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fnmla.u.nxv4f32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]], <vscale x 4 x float> [[C]]) -; CHECK-NEXT: ret <vscale x 4 x float> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fnmla.u.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]], <vscale x 4 x float> [[C]]) +; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] ; %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fnmla.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) @@ -601,9 +572,8 @@ declare <vscale x 2 x double> @llvm.aarch64.sve.fnmla.nxv2f64(<vscale x 2 x i1>, define <vscale x 2 x double> @replace_fnmla_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) #0 { ; CHECK-LABEL: define <vscale x 2 x double> @replace_fnmla_intrinsic_double ; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]], <vscale x 2 x double> [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fnmla.u.nxv2f64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]], <vscale x 2 x double> [[C]]) -; CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fnmla.u.nxv2f64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]], <vscale x 2 x double> [[C]]) +; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fnmla.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) @@ -626,9 +596,8 @@ declare <vscale x 8 x half> @llvm.aarch64.sve.fnmls.nxv8f16(<vscale x 8 x i1>, < define <vscale x 8 x half> @replace_fnmls_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) #0 { ; CHECK-LABEL: define <vscale x 8 x half> @replace_fnmls_intrinsic_half ; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]], <vscale x 8 x half> [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fnmls.u.nxv8f16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]], <vscale x 8 x half> [[C]]) -; CHECK-NEXT: ret <vscale x 8 x half> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fnmls.u.nxv8f16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]], <vscale x 8 x half> [[C]]) +; CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]] ; %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fnmls.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) @@ -639,9 +608,8 @@ declare <vscale x 4 x float> @llvm.aarch64.sve.fnmls.nxv4f32(<vscale x 4 x i1>, define <vscale x 4 x float> @replace_fnmls_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #0 { ; CHECK-LABEL: define <vscale x 4 x float> @replace_fnmls_intrinsic_float ; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]], <vscale x 4 x float> [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fnmls.u.nxv4f32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]], <vscale x 4 x float> [[C]]) -; CHECK-NEXT: ret <vscale x 4 x float> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fnmls.u.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]], <vscale x 4 x float> [[C]]) +; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] ; %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fnmls.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) @@ -652,9 +620,8 @@ declare <vscale x 2 x double> @llvm.aarch64.sve.fnmls.nxv2f64(<vscale x 2 x i1>, define <vscale x 2 x double> @replace_fnmls_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) #0 { ; CHECK-LABEL: define <vscale x 2 x double> @replace_fnmls_intrinsic_double ; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]], <vscale x 2 x double> [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fnmls.u.nxv2f64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]], <vscale x 2 x double> [[C]]) -; CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fnmls.u.nxv2f64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]], <vscale x 2 x double> [[C]]) +; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fnmls.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) @@ -729,9 +696,8 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.add.nxv16i8(<vscale x 16 x i1>, <vs define <vscale x 16 x i8> @replace_add_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 { ; CHECK-LABEL: define <vscale x 16 x i8> @replace_add_intrinsic_i8 ; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.add.u.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]]) -; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.add.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]]) +; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]] ; %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.add.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) @@ -742,9 +708,8 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.add.nxv8i16(<vscale x 8 x i1>, <vsc define <vscale x 8 x i16> @replace_add_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 { ; CHECK-LABEL: define <vscale x 8 x i16> @replace_add_intrinsic_i16 ; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.add.u.nxv8i16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]]) -; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.add.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]]) +; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] ; %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.add.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) @@ -755,9 +720,8 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1>, <vsc define <vscale x 4 x i32> @replace_add_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 { ; CHECK-LABEL: define <vscale x 4 x i32> @replace_add_intrinsic_i32 ; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.add.u.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]) -; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.add.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]) +; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] ; %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) @@ -768,9 +732,8 @@ declare <vscale x 2 x i64> @llvm.aarch64.sve.add.nxv2i64(<vscale x 2 x i1>, <vsc define <vscale x 2 x i64> @replace_add_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 { ; CHECK-LABEL: define <vscale x 2 x i64> @replace_add_intrinsic_i64 ; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.add.u.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]]) -; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.add.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]]) +; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] ; %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.add.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) @@ -793,9 +756,8 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.mla.nxv16i8(<vscale x 16 x i1>, <vs define <vscale x 16 x i8> @replace_mla_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) #0 { ; CHECK-LABEL: define <vscale x 16 x i8> @replace_mla_intrinsic_i8 ; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]], <vscale x 16 x i8> [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mla.u.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]], <vscale x 16 x i8> [[C]]) -; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mla.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]], <vscale x 16 x i8> [[C]]) +; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]] ; %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mla.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) @@ -806,9 +768,8 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.mla.nxv8i16(<vscale x 8 x i1>, <vsc define <vscale x 8 x i16> @replace_mla_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) #0 { ; CHECK-LABEL: define <vscale x 8 x i16> @replace_mla_intrinsic_i16 ; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]], <vscale x 8 x i16> [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mla.u.nxv8i16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]], <vscale x 8 x i16> [[C]]) -; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mla.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]], <vscale x 8 x i16> [[C]]) +; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] ; %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mla.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) @@ -819,9 +780,8 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.mla.nxv4i32(<vscale x 4 x i1>, <vsc define <vscale x 4 x i32> @replace_mla_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) #0 { ; CHECK-LABEL: define <vscale x 4 x i32> @replace_mla_intrinsic_i32 ; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]], <vscale x 4 x i32> [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mla.u.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]], <vscale x 4 x i32> [[C]]) -; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mla.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]], <vscale x 4 x i32> [[C]]) +; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] ; %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mla.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) @@ -832,9 +792,8 @@ declare <vscale x 2 x i64> @llvm.aarch64.sve.mla.nxv2i64(<vscale x 2 x i1>, <vsc define <vscale x 2 x i64> @replace_mla_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) #0 { ; CHECK-LABEL: define <vscale x 2 x i64> @replace_mla_intrinsic_i64 ; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]], <vscale x 2 x i64> [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mla.u.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]], <vscale x 2 x i64> [[C]]) -; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mla.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]], <vscale x 2 x i64> [[C]]) +; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] ; %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mla.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) @@ -857,9 +816,8 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.mls.nxv16i8(<vscale x 16 x i1>, <vs define <vscale x 16 x i8> @replace_mls_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) #0 { ; CHECK-LABEL: define <vscale x 16 x i8> @replace_mls_intrinsic_i8 ; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]], <vscale x 16 x i8> [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mls.u.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]], <vscale x 16 x i8> [[C]]) -; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mls.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]], <vscale x 16 x i8> [[C]]) +; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]] ; %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mls.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) @@ -870,9 +828,8 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.mls.nxv8i16(<vscale x 8 x i1>, <vsc define <vscale x 8 x i16> @replace_mls_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) #0 { ; CHECK-LABEL: define <vscale x 8 x i16> @replace_mls_intrinsic_i16 ; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]], <vscale x 8 x i16> [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mls.u.nxv8i16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]], <vscale x 8 x i16> [[C]]) -; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mls.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]], <vscale x 8 x i16> [[C]]) +; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] ; %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mls.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) @@ -883,9 +840,8 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.mls.nxv4i32(<vscale x 4 x i1>, <vsc define <vscale x 4 x i32> @replace_mls_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) #0 { ; CHECK-LABEL: define <vscale x 4 x i32> @replace_mls_intrinsic_i32 ; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]], <vscale x 4 x i32> [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mls.u.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]], <vscale x 4 x i32> [[C]]) -; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mls.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]], <vscale x 4 x i32> [[C]]) +; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] ; %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mls.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) @@ -896,9 +852,8 @@ declare <vscale x 2 x i64> @llvm.aarch64.sve.mls.nxv2i64(<vscale x 2 x i1>, <vsc define <vscale x 2 x i64> @replace_mls_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) #0 { ; CHECK-LABEL: define <vscale x 2 x i64> @replace_mls_intrinsic_i64 ; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]], <vscale x 2 x i64> [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mls.u.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]], <vscale x 2 x i64> [[C]]) -; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mls.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]], <vscale x 2 x i64> [[C]]) +; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] ; %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mls.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) @@ -921,9 +876,8 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.mul.nxv16i8(<vscale x 16 x i1>, <vs define <vscale x 16 x i8> @replace_mul_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 { ; CHECK-LABEL: define <vscale x 16 x i8> @replace_mul_intrinsic_i8 ; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mul.u.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]]) -; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mul.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]]) +; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]] ; %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mul.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) @@ -934,9 +888,8 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.mul.nxv8i16(<vscale x 8 x i1>, <vsc define <vscale x 8 x i16> @replace_mul_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 { ; CHECK-LABEL: define <vscale x 8 x i16> @replace_mul_intrinsic_i16 ; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mul.u.nxv8i16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]]) -; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mul.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]]) +; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] ; %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mul.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) @@ -947,9 +900,8 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1>, <vsc define <vscale x 4 x i32> @replace_mul_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 { ; CHECK-LABEL: define <vscale x 4 x i32> @replace_mul_intrinsic_i32 ; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mul.u.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]) -; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mul.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]) +; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] ; %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) @@ -960,9 +912,8 @@ declare <vscale x 2 x i64> @llvm.aarch64.sve.mul.nxv2i64(<vscale x 2 x i1>, <vsc define <vscale x 2 x i64> @replace_mul_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 { ; CHECK-LABEL: define <vscale x 2 x i64> @replace_mul_intrinsic_i64 ; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mul.u.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]]) -; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mul.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]]) +; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] ; %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mul.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) @@ -985,9 +936,8 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.sabd.nxv16i8(<vscale x 16 x i1>, <v define <vscale x 16 x i8> @replace_sabd_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 { ; CHECK-LABEL: define <vscale x 16 x i8> @replace_sabd_intrinsic_i8 ; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sabd.u.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]]) -; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sabd.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]]) +; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]] ; %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sabd.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) @@ -998,9 +948,8 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.sabd.nxv8i16(<vscale x 8 x i1>, <vs define <vscale x 8 x i16> @replace_sabd_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 { ; CHECK-LABEL: define <vscale x 8 x i16> @replace_sabd_intrinsic_i16 ; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sabd.u.nxv8i16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]]) -; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sabd.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]]) +; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] ; %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sabd.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) @@ -1011,9 +960,8 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.sabd.nxv4i32(<vscale x 4 x i1>, <vs define <vscale x 4 x i32> @replace_sabd_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 { ; CHECK-LABEL: define <vscale x 4 x i32> @replace_sabd_intrinsic_i32 ; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sabd.u.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]) -; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sabd.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]) +; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] ; %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sabd.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) @@ -1024,9 +972,8 @@ declare <vscale x 2 x i64> @llvm.aarch64.sve.sabd.nxv2i64(<vscale x 2 x i1>, <vs define <vscale x 2 x i64> @replace_sabd_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 { ; CHECK-LABEL: define <vscale x 2 x i64> @replace_sabd_intrinsic_i64 ; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sabd.u.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]]) -; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sabd.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]]) +; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] ; %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sabd.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) @@ -1049,9 +996,8 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.smax.nxv16i8(<vscale x 16 x i1>, <v define <vscale x 16 x i8> @replace_smax_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 { ; CHECK-LABEL: define <vscale x 16 x i8> @replace_smax_intrinsic_i8 ; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smax.u.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]]) -; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smax.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]]) +; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]] ; %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smax.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) @@ -1062,9 +1008,8 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.smax.nxv8i16(<vscale x 8 x i1>, <vs define <vscale x 8 x i16> @replace_smax_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 { ; CHECK-LABEL: define <vscale x 8 x i16> @replace_smax_intrinsic_i16 ; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smax.u.nxv8i16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]]) -; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smax.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]]) +; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] ; %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smax.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) @@ -1075,9 +1020,8 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.smax.nxv4i32(<vscale x 4 x i1>, <vs define <vscale x 4 x i32> @replace_smax_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 { ; CHECK-LABEL: define <vscale x 4 x i32> @replace_smax_intrinsic_i32 ; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smax.u.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]) -; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smax.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]) +; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] ; %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smax.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) @@ -1088,9 +1032,8 @@ declare <vscale x 2 x i64> @llvm.aarch64.sve.smax.nxv2i64(<vscale x 2 x i1>, <vs define <vscale x 2 x i64> @replace_smax_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 { ; CHECK-LABEL: define <vscale x 2 x i64> @replace_smax_intrinsic_i64 ; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smax.u.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]]) -; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smax.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]]) +; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] ; %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smax.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) @@ -1113,9 +1056,8 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.smin.nxv16i8(<vscale x 16 x i1>, <v define <vscale x 16 x i8> @replace_smin_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 { ; CHECK-LABEL: define <vscale x 16 x i8> @replace_smin_intrinsic_i8 ; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smin.u.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]]) -; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smin.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]]) +; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]] ; %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smin.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) @@ -1126,9 +1068,8 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.smin.nxv8i16(<vscale x 8 x i1>, <vs define <vscale x 8 x i16> @replace_smin_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 { ; CHECK-LABEL: define <vscale x 8 x i16> @replace_smin_intrinsic_i16 ; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smin.u.nxv8i16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]]) -; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smin.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]]) +; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] ; %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smin.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) @@ -1139,9 +1080,8 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.smin.nxv4i32(<vscale x 4 x i1>, <vs define <vscale x 4 x i32> @replace_smin_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 { ; CHECK-LABEL: define <vscale x 4 x i32> @replace_smin_intrinsic_i32 ; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smin.u.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]) -; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smin.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]) +; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] ; %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smin.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) @@ -1152,9 +1092,8 @@ declare <vscale x 2 x i64> @llvm.aarch64.sve.smin.nxv2i64(<vscale x 2 x i1>, <vs define <vscale x 2 x i64> @replace_smin_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 { ; CHECK-LABEL: define <vscale x 2 x i64> @replace_smin_intrinsic_i64 ; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smin.u.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]]) -; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smin.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]]) +; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] ; %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smin.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) @@ -1177,9 +1116,8 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.smulh.nxv16i8(<vscale x 16 x i1>, < define <vscale x 16 x i8> @replace_smulh_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 { ; CHECK-LABEL: define <vscale x 16 x i8> @replace_smulh_intrinsic_i8 ; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smulh.u.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]]) -; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smulh.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]]) +; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]] ; %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smulh.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) @@ -1190,9 +1128,8 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.smulh.nxv8i16(<vscale x 8 x i1>, <v define <vscale x 8 x i16> @replace_smulh_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 { ; CHECK-LABEL: define <vscale x 8 x i16> @replace_smulh_intrinsic_i16 ; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smulh.u.nxv8i16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]]) -; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smulh.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]]) +; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] ; %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smulh.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) @@ -1203,9 +1140,8 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.smulh.nxv4i32(<vscale x 4 x i1>, <v define <vscale x 4 x i32> @replace_smulh_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 { ; CHECK-LABEL: define <vscale x 4 x i32> @replace_smulh_intrinsic_i32 ; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smulh.u.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]) -; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smulh.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]) +; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] ; %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smulh.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) @@ -1216,9 +1152,8 @@ declare <vscale x 2 x i64> @llvm.aarch64.sve.smulh.nxv2i64(<vscale x 2 x i1>, <v define <vscale x 2 x i64> @replace_smulh_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 { ; CHECK-LABEL: define <vscale x 2 x i64> @replace_smulh_intrinsic_i64 ; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smulh.u.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]]) -; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smulh.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]]) +; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] ; %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smulh.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) @@ -1241,9 +1176,8 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.sub.nxv16i8(<vscale x 16 x i1>, <vs define <vscale x 16 x i8> @replace_sub_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 { ; CHECK-LABEL: define <vscale x 16 x i8> @replace_sub_intrinsic_i8 ; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sub.u.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]]) -; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sub.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]]) +; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]] ; %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sub.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) @@ -1254,9 +1188,8 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.sub.nxv8i16(<vscale x 8 x i1>, <vsc define <vscale x 8 x i16> @replace_sub_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 { ; CHECK-LABEL: define <vscale x 8 x i16> @replace_sub_intrinsic_i16 ; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sub.u.nxv8i16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]]) -; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sub.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]]) +; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] ; %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sub.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) @@ -1267,9 +1200,8 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1>, <vsc define <vscale x 4 x i32> @replace_sub_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 { ; CHECK-LABEL: define <vscale x 4 x i32> @replace_sub_intrinsic_i32 ; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]) -; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]) +; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] ; %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) @@ -1280,9 +1212,8 @@ declare <vscale x 2 x i64> @llvm.aarch64.sve.sub.nxv2i64(<vscale x 2 x i1>, <vsc define <vscale x 2 x i64> @replace_sub_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 { ; CHECK-LABEL: define <vscale x 2 x i64> @replace_sub_intrinsic_i64 ; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sub.u.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]]) -; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sub.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]]) +; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] ; %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sub.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) @@ -1305,9 +1236,8 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.uabd.nxv16i8(<vscale x 16 x i1>, <v define <vscale x 16 x i8> @replace_uabd_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 { ; CHECK-LABEL: define <vscale x 16 x i8> @replace_uabd_intrinsic_i8 ; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uabd.u.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]]) -; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uabd.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]]) +; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]] ; %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uabd.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) @@ -1318,9 +1248,8 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.uabd.nxv8i16(<vscale x 8 x i1>, <vs define <vscale x 8 x i16> @replace_uabd_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 { ; CHECK-LABEL: define <vscale x 8 x i16> @replace_uabd_intrinsic_i16 ; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uabd.u.nxv8i16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]]) -; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uabd.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]]) +; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] ; %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uabd.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) @@ -1331,9 +1260,8 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.uabd.nxv4i32(<vscale x 4 x i1>, <vs define <vscale x 4 x i32> @replace_uabd_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 { ; CHECK-LABEL: define <vscale x 4 x i32> @replace_uabd_intrinsic_i32 ; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uabd.u.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]) -; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uabd.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]) +; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] ; %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uabd.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) @@ -1344,9 +1272,8 @@ declare <vscale x 2 x i64> @llvm.aarch64.sve.uabd.nxv2i64(<vscale x 2 x i1>, <vs define <vscale x 2 x i64> @replace_uabd_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 { ; CHECK-LABEL: define <vscale x 2 x i64> @replace_uabd_intrinsic_i64 ; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uabd.u.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]]) -; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uabd.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]]) +; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] ; %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uabd.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) @@ -1369,9 +1296,8 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.umax.nxv16i8(<vscale x 16 x i1>, <v define <vscale x 16 x i8> @replace_umax_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 { ; CHECK-LABEL: define <vscale x 16 x i8> @replace_umax_intrinsic_i8 ; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umax.u.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]]) -; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umax.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]]) +; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]] ; %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umax.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) @@ -1382,9 +1308,8 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.umax.nxv8i16(<vscale x 8 x i1>, <vs define <vscale x 8 x i16> @replace_umax_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 { ; CHECK-LABEL: define <vscale x 8 x i16> @replace_umax_intrinsic_i16 ; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umax.u.nxv8i16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]]) -; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umax.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]]) +; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] ; %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umax.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) @@ -1395,9 +1320,8 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.umax.nxv4i32(<vscale x 4 x i1>, <vs define <vscale x 4 x i32> @replace_umax_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 { ; CHECK-LABEL: define <vscale x 4 x i32> @replace_umax_intrinsic_i32 ; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umax.u.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]) -; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umax.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]) +; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] ; %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umax.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) @@ -1408,9 +1332,8 @@ declare <vscale x 2 x i64> @llvm.aarch64.sve.umax.nxv2i64(<vscale x 2 x i1>, <vs define <vscale x 2 x i64> @replace_umax_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 { ; CHECK-LABEL: define <vscale x 2 x i64> @replace_umax_intrinsic_i64 ; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umax.u.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]]) -; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umax.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]]) +; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] ; %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umax.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) @@ -1433,9 +1356,8 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.umin.nxv16i8(<vscale x 16 x i1>, <v define <vscale x 16 x i8> @replace_umin_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 { ; CHECK-LABEL: define <vscale x 16 x i8> @replace_umin_intrinsic_i8 ; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umin.u.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]]) -; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umin.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]]) +; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]] ; %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umin.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) @@ -1446,9 +1368,8 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.umin.nxv8i16(<vscale x 8 x i1>, <vs define <vscale x 8 x i16> @replace_umin_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 { ; CHECK-LABEL: define <vscale x 8 x i16> @replace_umin_intrinsic_i16 ; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umin.u.nxv8i16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]]) -; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umin.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]]) +; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] ; %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umin.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) @@ -1459,9 +1380,8 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.umin.nxv4i32(<vscale x 4 x i1>, <vs define <vscale x 4 x i32> @replace_umin_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 { ; CHECK-LABEL: define <vscale x 4 x i32> @replace_umin_intrinsic_i32 ; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umin.u.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]) -; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umin.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]) +; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] ; %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umin.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) @@ -1472,9 +1392,8 @@ declare <vscale x 2 x i64> @llvm.aarch64.sve.umin.nxv2i64(<vscale x 2 x i1>, <vs define <vscale x 2 x i64> @replace_umin_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 { ; CHECK-LABEL: define <vscale x 2 x i64> @replace_umin_intrinsic_i64 ; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umin.u.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]]) -; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umin.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]]) +; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] ; %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umin.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) @@ -1497,9 +1416,8 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.umulh.nxv16i8(<vscale x 16 x i1>, < define <vscale x 16 x i8> @replace_umulh_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 { ; CHECK-LABEL: define <vscale x 16 x i8> @replace_umulh_intrinsic_i8 ; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umulh.u.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]]) -; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umulh.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]]) +; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]] ; %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umulh.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) @@ -1510,9 +1428,8 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.umulh.nxv8i16(<vscale x 8 x i1>, <v define <vscale x 8 x i16> @replace_umulh_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 { ; CHECK-LABEL: define <vscale x 8 x i16> @replace_umulh_intrinsic_i16 ; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umulh.u.nxv8i16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]]) -; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umulh.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]]) +; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] ; %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umulh.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) @@ -1523,9 +1440,8 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.umulh.nxv4i32(<vscale x 4 x i1>, <v define <vscale x 4 x i32> @replace_umulh_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 { ; CHECK-LABEL: define <vscale x 4 x i32> @replace_umulh_intrinsic_i32 ; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.u.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]) -; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]) +; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] ; %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) @@ -1536,9 +1452,8 @@ declare <vscale x 2 x i64> @llvm.aarch64.sve.umulh.nxv2i64(<vscale x 2 x i1>, <v define <vscale x 2 x i64> @replace_umulh_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 { ; CHECK-LABEL: define <vscale x 2 x i64> @replace_umulh_intrinsic_i64 ; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umulh.u.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]]) -; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umulh.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]]) +; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] ; %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umulh.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) @@ -1563,9 +1478,8 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1>, <vs define <vscale x 16 x i8> @replace_asr_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 { ; CHECK-LABEL: define <vscale x 16 x i8> @replace_asr_intrinsic_i8 ; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.asr.u.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]]) -; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.asr.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]]) +; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]] ; %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) @@ -1576,9 +1490,8 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1>, <vsc define <vscale x 8 x i16> @replace_asr_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 { ; CHECK-LABEL: define <vscale x 8 x i16> @replace_asr_intrinsic_i16 ; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.asr.u.nxv8i16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]]) -; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.asr.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]]) +; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] ; %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) @@ -1589,9 +1502,8 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1>, <vsc define <vscale x 4 x i32> @replace_asr_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 { ; CHECK-LABEL: define <vscale x 4 x i32> @replace_asr_intrinsic_i32 ; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.asr.u.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]) -; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.asr.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]) +; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] ; %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) @@ -1602,9 +1514,8 @@ declare <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1>, <vsc define <vscale x 2 x i64> @replace_asr_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 { ; CHECK-LABEL: define <vscale x 2 x i64> @replace_asr_intrinsic_i64 ; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.asr.u.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]]) -; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.asr.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]]) +; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] ; %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) @@ -1627,9 +1538,8 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1>, <vs define <vscale x 16 x i8> @replace_lsl_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 { ; CHECK-LABEL: define <vscale x 16 x i8> @replace_lsl_intrinsic_i8 ; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.u.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]]) -; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]]) +; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]] ; %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) @@ -1640,9 +1550,8 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1>, <vsc define <vscale x 8 x i16> @replace_lsl_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 { ; CHECK-LABEL: define <vscale x 8 x i16> @replace_lsl_intrinsic_i16 ; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.u.nxv8i16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]]) -; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]]) +; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] ; %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) @@ -1653,9 +1562,8 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1>, <vsc define <vscale x 4 x i32> @replace_lsl_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 { ; CHECK-LABEL: define <vscale x 4 x i32> @replace_lsl_intrinsic_i32 ; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.u.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]) -; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]) +; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] ; %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) @@ -1666,9 +1574,8 @@ declare <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1>, <vsc define <vscale x 2 x i64> @replace_lsl_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 { ; CHECK-LABEL: define <vscale x 2 x i64> @replace_lsl_intrinsic_i64 ; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.u.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]]) -; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]]) +; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] ; %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) @@ -1691,9 +1598,8 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1>, <vs define <vscale x 16 x i8> @replace_lsr_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 { ; CHECK-LABEL: define <vscale x 16 x i8> @replace_lsr_intrinsic_i8 ; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.u.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]]) -; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]]) +; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]] ; %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) @@ -1704,9 +1610,8 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1>, <vsc define <vscale x 8 x i16> @replace_lsr_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 { ; CHECK-LABEL: define <vscale x 8 x i16> @replace_lsr_intrinsic_i16 ; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.u.nxv8i16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]]) -; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]]) +; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] ; %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) @@ -1717,9 +1622,8 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1>, <vsc define <vscale x 4 x i32> @replace_lsr_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 { ; CHECK-LABEL: define <vscale x 4 x i32> @replace_lsr_intrinsic_i32 ; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.u.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]) -; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]) +; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] ; %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) @@ -1730,9 +1634,8 @@ declare <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1>, <vsc define <vscale x 2 x i64> @replace_lsr_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 { ; CHECK-LABEL: define <vscale x 2 x i64> @replace_lsr_intrinsic_i64 ; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.u.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]]) -; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]]) +; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] ; %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) @@ -1757,9 +1660,8 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.and.nxv16i8(<vscale x 16 x i1>, <vs define <vscale x 16 x i8> @replace_and_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 { ; CHECK-LABEL: define <vscale x 16 x i8> @replace_and_intrinsic_i8 ; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.and.u.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]]) -; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.and.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]]) +; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]] ; %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.and.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) @@ -1770,9 +1672,8 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.and.nxv8i16(<vscale x 8 x i1>, <vsc define <vscale x 8 x i16> @replace_and_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 { ; CHECK-LABEL: define <vscale x 8 x i16> @replace_and_intrinsic_i16 ; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.and.u.nxv8i16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]]) -; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.and.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]]) +; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] ; %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.and.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) @@ -1783,9 +1684,8 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.and.nxv4i32(<vscale x 4 x i1>, <vsc define <vscale x 4 x i32> @replace_and_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 { ; CHECK-LABEL: define <vscale x 4 x i32> @replace_and_intrinsic_i32 ; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.and.u.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]) -; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.and.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]) +; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] ; %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.and.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) @@ -1796,9 +1696,8 @@ declare <vscale x 2 x i64> @llvm.aarch64.sve.and.nxv2i64(<vscale x 2 x i1>, <vsc define <vscale x 2 x i64> @replace_and_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 { ; CHECK-LABEL: define <vscale x 2 x i64> @replace_and_intrinsic_i64 ; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.and.u.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]]) -; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.and.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]]) +; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] ; %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.and.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) @@ -1821,9 +1720,8 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.bic.nxv16i8(<vscale x 16 x i1>, <vs define <vscale x 16 x i8> @replace_bic_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 { ; CHECK-LABEL: define <vscale x 16 x i8> @replace_bic_intrinsic_i8 ; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.bic.u.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]]) -; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.bic.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]]) +; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]] ; %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.bic.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) @@ -1834,9 +1732,8 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.bic.nxv8i16(<vscale x 8 x i1>, <vsc define <vscale x 8 x i16> @replace_bic_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 { ; CHECK-LABEL: define <vscale x 8 x i16> @replace_bic_intrinsic_i16 ; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.bic.u.nxv8i16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]]) -; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.bic.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]]) +; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] ; %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.bic.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) @@ -1847,9 +1744,8 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.bic.nxv4i32(<vscale x 4 x i1>, <vsc define <vscale x 4 x i32> @replace_bic_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 { ; CHECK-LABEL: define <vscale x 4 x i32> @replace_bic_intrinsic_i32 ; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.bic.u.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]) -; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.bic.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]) +; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] ; %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.bic.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) @@ -1860,9 +1756,8 @@ declare <vscale x 2 x i64> @llvm.aarch64.sve.bic.nxv2i64(<vscale x 2 x i1>, <vsc define <vscale x 2 x i64> @replace_bic_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 { ; CHECK-LABEL: define <vscale x 2 x i64> @replace_bic_intrinsic_i64 ; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.bic.u.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]]) -; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.bic.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]]) +; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] ; %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.bic.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) @@ -1885,9 +1780,8 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.eor.nxv16i8(<vscale x 16 x i1>, <vs define <vscale x 16 x i8> @replace_eor_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 { ; CHECK-LABEL: define <vscale x 16 x i8> @replace_eor_intrinsic_i8 ; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.eor.u.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]]) -; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.eor.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]]) +; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]] ; %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.eor.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) @@ -1898,9 +1792,8 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.eor.nxv8i16(<vscale x 8 x i1>, <vsc define <vscale x 8 x i16> @replace_eor_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 { ; CHECK-LABEL: define <vscale x 8 x i16> @replace_eor_intrinsic_i16 ; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.eor.u.nxv8i16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]]) -; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.eor.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]]) +; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] ; %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.eor.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) @@ -1911,9 +1804,8 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.eor.nxv4i32(<vscale x 4 x i1>, <vsc define <vscale x 4 x i32> @replace_eor_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 { ; CHECK-LABEL: define <vscale x 4 x i32> @replace_eor_intrinsic_i32 ; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.eor.u.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]) -; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.eor.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]) +; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] ; %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.eor.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) @@ -1924,9 +1816,8 @@ declare <vscale x 2 x i64> @llvm.aarch64.sve.eor.nxv2i64(<vscale x 2 x i1>, <vsc define <vscale x 2 x i64> @replace_eor_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 { ; CHECK-LABEL: define <vscale x 2 x i64> @replace_eor_intrinsic_i64 ; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.eor.u.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]]) -; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.eor.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]]) +; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] ; %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.eor.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) @@ -1949,9 +1840,8 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.orr.nxv16i8(<vscale x 16 x i1>, <vs define <vscale x 16 x i8> @replace_orr_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 { ; CHECK-LABEL: define <vscale x 16 x i8> @replace_orr_intrinsic_i8 ; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.orr.u.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]]) -; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.orr.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]]) +; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]] ; %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.orr.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) @@ -1962,9 +1852,8 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.orr.nxv8i16(<vscale x 8 x i1>, <vsc define <vscale x 8 x i16> @replace_orr_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 { ; CHECK-LABEL: define <vscale x 8 x i16> @replace_orr_intrinsic_i16 ; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.orr.u.nxv8i16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]]) -; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.orr.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]]) +; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] ; %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.orr.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) @@ -1975,9 +1864,8 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1>, <vsc define <vscale x 4 x i32> @replace_orr_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 { ; CHECK-LABEL: define <vscale x 4 x i32> @replace_orr_intrinsic_i32 ; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.u.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]) -; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]) +; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] ; %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) @@ -1988,9 +1876,8 @@ declare <vscale x 2 x i64> @llvm.aarch64.sve.orr.nxv2i64(<vscale x 2 x i1>, <vsc define <vscale x 2 x i64> @replace_orr_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 { ; CHECK-LABEL: define <vscale x 2 x i64> @replace_orr_intrinsic_i64 ; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.orr.u.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]]) -; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.orr.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]]) +; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] ; %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.orr.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) @@ -2015,9 +1902,8 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.nxv16i8(<vscale x 16 x i1>, < define <vscale x 16 x i8> @replace_sqsub_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 { ; CHECK-LABEL: define <vscale x 16 x i8> @replace_sqsub_intrinsic_i8 ; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.u.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]]) -; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]]) +; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]] ; %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) @@ -2028,9 +1914,8 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.nxv8i16(<vscale x 8 x i1>, <v define <vscale x 8 x i16> @replace_sqsub_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 { ; CHECK-LABEL: define <vscale x 8 x i16> @replace_sqsub_intrinsic_i16 ; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.u.nxv8i16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]]) -; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]]) +; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] ; %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) @@ -2041,9 +1926,8 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.nxv4i32(<vscale x 4 x i1>, <v define <vscale x 4 x i32> @replace_sqsub_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 { ; CHECK-LABEL: define <vscale x 4 x i32> @replace_sqsub_intrinsic_i32 ; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.u.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]) -; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]) +; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] ; %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) @@ -2054,9 +1938,8 @@ declare <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.nxv2i64(<vscale x 2 x i1>, <v define <vscale x 2 x i64> @replace_sqsub_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 { ; CHECK-LABEL: define <vscale x 2 x i64> @replace_sqsub_intrinsic_i64 ; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.u.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]]) -; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]]) +; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] ; %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) @@ -2079,9 +1962,8 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.nxv16i8(<vscale x 16 x i1>, < define <vscale x 16 x i8> @replace_uqsub_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 { ; CHECK-LABEL: define <vscale x 16 x i8> @replace_uqsub_intrinsic_i8 ; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.u.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]]) -; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]]) +; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]] ; %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) @@ -2092,9 +1974,8 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.nxv8i16(<vscale x 8 x i1>, <v define <vscale x 8 x i16> @replace_uqsub_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 { ; CHECK-LABEL: define <vscale x 8 x i16> @replace_uqsub_intrinsic_i16 ; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.u.nxv8i16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]]) -; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]]) +; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] ; %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) @@ -2105,9 +1986,8 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1>, <v define <vscale x 4 x i32> @replace_uqsub_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 { ; CHECK-LABEL: define <vscale x 4 x i32> @replace_uqsub_intrinsic_i32 ; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.u.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]) -; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]) +; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] ; %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) @@ -2118,9 +1998,8 @@ declare <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.nxv2i64(<vscale x 2 x i1>, <v define <vscale x 2 x i64> @replace_uqsub_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 { ; CHECK-LABEL: define <vscale x 2 x i64> @replace_uqsub_intrinsic_i64 ; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.u.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]]) -; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]]) +; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] ; %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-ptest.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-ptest.ll index a22454b586c2..e00ad68bdfac 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-ptest.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-ptest.ll @@ -39,8 +39,7 @@ define i1 @ptest_any1(<vscale x 2 x i1> %a) #0 { ; No transform because the ptest is using differently sized operands. define i1 @ptest_any2(<vscale x 4 x i1> %a) #0 { ; CHECK-LABEL: @ptest_any2( -; CHECK-NEXT: [[MASK:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[MASK]]) +; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> splat (i1 true)) ; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[A:%.*]]) ; CHECK-NEXT: [[OUT:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i1> [[TMP2]]) ; CHECK-NEXT: ret i1 [[OUT]] diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-rdffr-predication.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-rdffr-predication.ll index 155102db52b5..ff728dc47c66 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-rdffr-predication.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-rdffr-predication.ll @@ -6,8 +6,7 @@ target triple = "aarch64-unknown-linux-gnu" ; Test that rdffr is substituted with predicated form which enables ptest optimization later. define <vscale x 16 x i1> @predicate_rdffr() #0 { ; CHECK-LABEL: @predicate_rdffr( -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1> [[TMP1]]) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 16 x i1> [[OUT]] ; %out = call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr() diff --git a/llvm/test/Transforms/InstCombine/array.ll b/llvm/test/Transforms/InstCombine/array.ll index 3edb47dda62c..a09b66273368 100644 --- a/llvm/test/Transforms/InstCombine/array.ll +++ b/llvm/test/Transforms/InstCombine/array.ll @@ -109,6 +109,58 @@ entry: ret void } +; FIXME: Should be transformed as OR+GEP -> GEP+GEP (similar to gep_inbounds_add_nuw below). +define ptr @gep_inbounds_nuwaddlike(ptr %ptr, i64 %a, i64 %b) { +; CHECK-LABEL: define ptr @gep_inbounds_nuwaddlike( +; CHECK-SAME: ptr [[PTR:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) { +; CHECK-NEXT: [[ADD:%.*]] = or disjoint i64 [[A]], [[B]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[PTR]], i64 [[ADD]] +; CHECK-NEXT: ret ptr [[GEP]] +; + %add = or disjoint i64 %a, %b + %gep = getelementptr inbounds nuw i32, ptr %ptr, i64 %add + ret ptr %gep +} + +; FIXME: Preserve "inbounds nuw". +define ptr @gep_inbounds_add_nuw(ptr %ptr, i64 %a, i64 %b) { +; CHECK-LABEL: define ptr @gep_inbounds_add_nuw( +; CHECK-SAME: ptr [[PTR:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[A]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[TMP1]], i64 [[B]] +; CHECK-NEXT: ret ptr [[GEP]] +; + %add = add nuw i64 %a, %b + %gep = getelementptr inbounds nuw i32, ptr %ptr, i64 %add + ret ptr %gep +} + +; FIXME: Preserve "nusw nuw". +define ptr @gep_inbounds_add_nusw_nuw(ptr %ptr, i64 %a, i64 %b) { +; CHECK-LABEL: define ptr @gep_inbounds_add_nusw_nuw( +; CHECK-SAME: ptr [[PTR:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[A]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[TMP1]], i64 [[B]] +; CHECK-NEXT: ret ptr [[GEP]] +; + %add = add nuw i64 %a, %b + %gep = getelementptr nusw nuw i32, ptr %ptr, i64 %add + ret ptr %gep +} + +; FIXME: Preserve "nuw". +define ptr @gep_add_nuw(ptr %ptr, i64 %a, i64 %b) { +; CHECK-LABEL: define ptr @gep_add_nuw( +; CHECK-SAME: ptr [[PTR:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[A]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[TMP1]], i64 [[B]] +; CHECK-NEXT: ret ptr [[GEP]] +; + %add = add nuw i64 %a, %b + %gep = getelementptr nuw i32, ptr %ptr, i64 %add + ret ptr %gep +} + define ptr @gep_inbounds_add_nsw_nonneg(ptr %ptr, i64 %a, i64 %b) { ; CHECK-LABEL: define ptr @gep_inbounds_add_nsw_nonneg( ; CHECK-SAME: ptr [[PTR:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) { @@ -219,6 +271,27 @@ define ptr @gep_inbounds_sext_add_nonneg(ptr %ptr, i32 %a) { ret ptr %gep } +; FIXME: Could be optimized similar to gep_inbounds_sext_add_nonneg above +; (difference is that we are using disjoint OR which is canonical form +; of ADD with disjoint operands). +define ptr @gep_inbounds_sext_addlike_nonneg(ptr %ptr, i32 %a) { +; CHECK-LABEL: define ptr @gep_inbounds_sext_addlike_nonneg( +; CHECK-SAME: ptr [[PTR:%.*]], i32 [[A:%.*]]) { +; CHECK-NEXT: [[A_NNEG:%.*]] = icmp sgt i32 [[A]], -1 +; CHECK-NEXT: call void @llvm.assume(i1 [[A_NNEG]]) +; CHECK-NEXT: [[ADD:%.*]] = or disjoint i32 [[A]], 10 +; CHECK-NEXT: [[IDX:%.*]] = zext nneg i32 [[ADD]] to i64 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[PTR]], i64 [[IDX]] +; CHECK-NEXT: ret ptr [[GEP]] +; + %a.nneg = icmp sgt i32 %a, -1 + call void @llvm.assume(i1 %a.nneg) + %add = or disjoint i32 %a, 10 + %idx = sext i32 %add to i64 + %gep = getelementptr inbounds i32, ptr %ptr, i64 %idx + ret ptr %gep +} + define ptr @gep_inbounds_sext_add_not_nonneg_1(ptr %ptr, i32 %a) { ; CHECK-LABEL: define ptr @gep_inbounds_sext_add_not_nonneg_1( ; CHECK-SAME: ptr [[PTR:%.*]], i32 [[A:%.*]]) { diff --git a/llvm/test/Transforms/InstCombine/fabs-as-int.ll b/llvm/test/Transforms/InstCombine/fabs-as-int.ll index 9d28cae8f04d..f0e83ca6302f 100644 --- a/llvm/test/Transforms/InstCombine/fabs-as-int.ll +++ b/llvm/test/Transforms/InstCombine/fabs-as-int.ll @@ -289,8 +289,8 @@ define i128 @fabs_as_int_ppc_fp128_f64_mask(ppc_fp128 %x) { define i128 @fabs_as_int_ppc_fp128_f128_mask(ppc_fp128 %x) { ; CHECK-LABEL: define i128 @fabs_as_int_ppc_fp128_f128_mask ; CHECK-SAME: (ppc_fp128 [[X:%.*]]) { -; CHECK-NEXT: [[BC:%.*]] = bitcast ppc_fp128 [[X]] to i128 -; CHECK-NEXT: [[AND:%.*]] = and i128 [[BC]], 170141183460469231731687303715884105727 +; CHECK-NEXT: [[TMP1:%.*]] = call ppc_fp128 @llvm.fabs.ppcf128(ppc_fp128 [[X]]) +; CHECK-NEXT: [[AND:%.*]] = bitcast ppc_fp128 [[TMP1]] to i128 ; CHECK-NEXT: ret i128 [[AND]] ; %bc = bitcast ppc_fp128 %x to i128 diff --git a/llvm/test/Transforms/InstCombine/fneg-as-int.ll b/llvm/test/Transforms/InstCombine/fneg-as-int.ll index f8d88b4f238f..590aca687e5b 100644 --- a/llvm/test/Transforms/InstCombine/fneg-as-int.ll +++ b/llvm/test/Transforms/InstCombine/fneg-as-int.ll @@ -291,8 +291,8 @@ define i128 @fneg_as_int_ppc_fp128_f64_mask(ppc_fp128 %x) { define i128 @fneg_as_int_ppc_fp128_f128_mask(ppc_fp128 %x) { ; CHECK-LABEL: define i128 @fneg_as_int_ppc_fp128_f128_mask ; CHECK-SAME: (ppc_fp128 [[X:%.*]]) { -; CHECK-NEXT: [[BC:%.*]] = bitcast ppc_fp128 [[X]] to i128 -; CHECK-NEXT: [[XOR:%.*]] = xor i128 [[BC]], -170141183460469231731687303715884105728 +; CHECK-NEXT: [[TMP1:%.*]] = fneg ppc_fp128 [[X]] +; CHECK-NEXT: [[XOR:%.*]] = bitcast ppc_fp128 [[TMP1]] to i128 ; CHECK-NEXT: ret i128 [[XOR]] ; %bc = bitcast ppc_fp128 %x to i128 diff --git a/llvm/test/Transforms/InstCombine/fneg-fabs-as-int.ll b/llvm/test/Transforms/InstCombine/fneg-fabs-as-int.ll index 8b245bdd7929..a0894c3febc9 100644 --- a/llvm/test/Transforms/InstCombine/fneg-fabs-as-int.ll +++ b/llvm/test/Transforms/InstCombine/fneg-fabs-as-int.ll @@ -317,8 +317,9 @@ define i128 @fneg_fabs_as_int_ppc_fp128_f64_mask(ppc_fp128 %x) { define i128 @fneg_fabs_as_int_ppc_fp128_f128_mask(ppc_fp128 %x) { ; CHECK-LABEL: define i128 @fneg_fabs_as_int_ppc_fp128_f128_mask ; CHECK-SAME: (ppc_fp128 [[X:%.*]]) { -; CHECK-NEXT: [[BC:%.*]] = bitcast ppc_fp128 [[X]] to i128 -; CHECK-NEXT: [[OR:%.*]] = or i128 [[BC]], -170141183460469231731687303715884105728 +; CHECK-NEXT: [[TMP1:%.*]] = call ppc_fp128 @llvm.fabs.ppcf128(ppc_fp128 [[X]]) +; CHECK-NEXT: [[TMP2:%.*]] = fneg ppc_fp128 [[TMP1]] +; CHECK-NEXT: [[OR:%.*]] = bitcast ppc_fp128 [[TMP2]] to i128 ; CHECK-NEXT: ret i128 [[OR]] ; %bc = bitcast ppc_fp128 %x to i128 diff --git a/llvm/test/Transforms/InstCombine/sincospi.ll b/llvm/test/Transforms/InstCombine/sincospi.ll index b76ae2017114..14da03dff6f4 100644 --- a/llvm/test/Transforms/InstCombine/sincospi.ll +++ b/llvm/test/Transforms/InstCombine/sincospi.ll @@ -90,18 +90,14 @@ define float @test_instbased_f32_other_user(ptr %ptr) { define float @test_constant_f32() { ; CHECK-FLOAT-IN-VEC-LABEL: @test_constant_f32( -; CHECK-FLOAT-IN-VEC-NEXT: [[SINCOSPI:%.*]] = call <2 x float> @__sincospif_stret(float 1.000000e+00) -; CHECK-FLOAT-IN-VEC-NEXT: [[SINPI:%.*]] = extractelement <2 x float> [[SINCOSPI]], i64 0 -; CHECK-FLOAT-IN-VEC-NEXT: [[COSPI:%.*]] = extractelement <2 x float> [[SINCOSPI]], i64 1 -; CHECK-FLOAT-IN-VEC-NEXT: [[COS:%.*]] = call float @__cospif(float 1.000000e+00) #[[ATTR0]] +; CHECK-FLOAT-IN-VEC-NEXT: [[SINPI:%.*]] = call float @__sinpif(float 1.000000e+00) #[[ATTR0]] +; CHECK-FLOAT-IN-VEC-NEXT: [[COSPI:%.*]] = call float @__cospif(float 1.000000e+00) #[[ATTR0]] ; CHECK-FLOAT-IN-VEC-NEXT: [[RES:%.*]] = fadd float [[SINPI]], [[COSPI]] ; CHECK-FLOAT-IN-VEC-NEXT: ret float [[RES]] ; ; CHECK-LABEL: @test_constant_f32( -; CHECK-NEXT: [[SINCOSPI:%.*]] = call { float, float } @__sincospif_stret(float 1.000000e+00) -; CHECK-NEXT: [[SINPI:%.*]] = extractvalue { float, float } [[SINCOSPI]], 0 -; CHECK-NEXT: [[COSPI:%.*]] = extractvalue { float, float } [[SINCOSPI]], 1 -; CHECK-NEXT: [[COS:%.*]] = call float @__cospif(float 1.000000e+00) #[[ATTR0]] +; CHECK-NEXT: [[SINPI:%.*]] = call float @__sinpif(float 1.000000e+00) #[[ATTR0]] +; CHECK-NEXT: [[COSPI:%.*]] = call float @__cospif(float 1.000000e+00) #[[ATTR0]] ; CHECK-NEXT: [[RES:%.*]] = fadd float [[SINPI]], [[COSPI]] ; CHECK-NEXT: ret float [[RES]] ; @@ -172,18 +168,14 @@ define double @test_instbased_f64() { define double @test_constant_f64() { ; CHECK-FLOAT-IN-VEC-LABEL: @test_constant_f64( -; CHECK-FLOAT-IN-VEC-NEXT: [[SINCOSPI:%.*]] = call { double, double } @__sincospi_stret(double 1.000000e+00) -; CHECK-FLOAT-IN-VEC-NEXT: [[SINPI:%.*]] = extractvalue { double, double } [[SINCOSPI]], 0 -; CHECK-FLOAT-IN-VEC-NEXT: [[COSPI:%.*]] = extractvalue { double, double } [[SINCOSPI]], 1 -; CHECK-FLOAT-IN-VEC-NEXT: [[COS:%.*]] = call double @__cospi(double 1.000000e+00) #[[ATTR0]] +; CHECK-FLOAT-IN-VEC-NEXT: [[SINPI:%.*]] = call double @__sinpi(double 1.000000e+00) #[[ATTR0]] +; CHECK-FLOAT-IN-VEC-NEXT: [[COSPI:%.*]] = call double @__cospi(double 1.000000e+00) #[[ATTR0]] ; CHECK-FLOAT-IN-VEC-NEXT: [[RES:%.*]] = fadd double [[SINPI]], [[COSPI]] ; CHECK-FLOAT-IN-VEC-NEXT: ret double [[RES]] ; ; CHECK-LABEL: @test_constant_f64( -; CHECK-NEXT: [[SINCOSPI:%.*]] = call { double, double } @__sincospi_stret(double 1.000000e+00) -; CHECK-NEXT: [[SINPI:%.*]] = extractvalue { double, double } [[SINCOSPI]], 0 -; CHECK-NEXT: [[COSPI:%.*]] = extractvalue { double, double } [[SINCOSPI]], 1 -; CHECK-NEXT: [[COS:%.*]] = call double @__cospi(double 1.000000e+00) #[[ATTR0]] +; CHECK-NEXT: [[SINPI:%.*]] = call double @__sinpi(double 1.000000e+00) #[[ATTR0]] +; CHECK-NEXT: [[COSPI:%.*]] = call double @__cospi(double 1.000000e+00) #[[ATTR0]] ; CHECK-NEXT: [[RES:%.*]] = fadd double [[SINPI]], [[COSPI]] ; CHECK-NEXT: ret double [[RES]] ; diff --git a/llvm/test/Transforms/LICM/pr50367.ll b/llvm/test/Transforms/LICM/pr50367.ll index a7cf21deff62..7fd176b6c6bb 100644 --- a/llvm/test/Transforms/LICM/pr50367.ll +++ b/llvm/test/Transforms/LICM/pr50367.ll @@ -2,7 +2,7 @@ ; RUN: opt -S -passes='loop-mssa(licm)' < %s | FileCheck %s @e = external dso_local global ptr, align 8 -define void @main(i1 %arg) { +define void @main(i1 %arg, ptr %arg1) { ; CHECK-LABEL: @main( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP1:%.*]] @@ -11,8 +11,47 @@ define void @main(i1 %arg) { ; CHECK: loop2: ; CHECK-NEXT: br i1 [[ARG:%.*]], label [[LOOP2_LATCH:%.*]], label [[LOOP_LATCH:%.*]] ; CHECK: loop2.latch: +; CHECK-NEXT: store i32 0, ptr [[ARG1:%.*]], align 4 ; CHECK-NEXT: br label [[LOOP2]] ; CHECK: loop.latch: +; CHECK-NEXT: store ptr null, ptr @e, align 8, !tbaa [[TBAA0:![0-9]+]] +; CHECK-NEXT: [[PTR:%.*]] = load ptr, ptr @e, align 8, !tbaa [[TBAA0]] +; CHECK-NEXT: store i32 0, ptr [[PTR]], align 4, !tbaa [[TBAA4:![0-9]+]] +; CHECK-NEXT: br label [[LOOP1]] +; +entry: + br label %loop1 + +loop1: + br label %loop2 + +loop2: + br i1 %arg, label %loop2.latch, label %loop.latch + +loop2.latch: + store i32 0, ptr %arg1, align 4 + br label %loop2 + +loop.latch: + store ptr null, ptr @e, align 8, !tbaa !0 + %ptr = load ptr, ptr @e, align 8, !tbaa !0 + store i32 0, ptr %ptr, align 4, !tbaa !4 + br label %loop1 +} + +define void @store_null(i1 %arg) { +; CHECK-LABEL: @store_null( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP1:%.*]] +; CHECK: loop1: +; CHECK-NEXT: br label [[LOOP2:%.*]] +; CHECK: loop2: +; CHECK-NEXT: br i1 [[ARG:%.*]], label [[LOOP2_LATCH:%.*]], label [[LOOP_LATCH:%.*]] +; CHECK: loop2.latch: +; CHECK-NEXT: store i32 0, ptr null, align 4 +; CHECK-NEXT: br label [[LOOP2]] +; CHECK: loop.latch: +; CHECK-NEXT: store i32 0, ptr null, align 4, !tbaa [[TBAA4]] ; CHECK-NEXT: br label [[LOOP1]] ; entry: diff --git a/llvm/test/Transforms/LICM/pr59324.ll b/llvm/test/Transforms/LICM/pr59324.ll index b0ad60e65069..ec33a0f8ded0 100644 --- a/llvm/test/Transforms/LICM/pr59324.ll +++ b/llvm/test/Transforms/LICM/pr59324.ll @@ -6,7 +6,10 @@ define void @test(ptr %a) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[V:%.*]] = load i32, ptr null, align 4 +; CHECK-NEXT: store ptr null, ptr null, align 8 +; CHECK-NEXT: [[P:%.*]] = load ptr, ptr null, align 8 +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[P]], align 4 +; CHECK-NEXT: store i32 [[V]], ptr [[A:%.*]], align 4 ; CHECK-NEXT: br label [[LOOP]] ; entry: @@ -19,3 +22,25 @@ loop: store i32 %v, ptr %a br label %loop } + +define void @test_inttoptr(ptr %a) { +; CHECK-LABEL: @test_inttoptr( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: store ptr null, ptr inttoptr (i64 128 to ptr), align 8 +; CHECK-NEXT: [[P:%.*]] = load ptr, ptr inttoptr (i64 128 to ptr), align 8 +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[P]], align 4 +; CHECK-NEXT: store i32 [[V]], ptr [[A:%.*]], align 4 +; CHECK-NEXT: br label [[LOOP]] +; +entry: + br label %loop + +loop: + store ptr null, ptr inttoptr (i64 128 to ptr) + %p = load ptr, ptr inttoptr (i64 128 to ptr) + %v = load i32, ptr %p + store i32 %v, ptr %a + br label %loop +} diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse-output.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse-output.ll index f01aaa04606d..a28673cf8e55 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse-output.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse-output.ll @@ -429,7 +429,249 @@ exit: ret void } +define void @vector_reverse_irregular_type(ptr noalias %A, ptr noalias %B) { +; RV64-LABEL: define void @vector_reverse_irregular_type( +; RV64-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) #[[ATTR0]] { +; RV64-NEXT: [[ENTRY:.*]]: +; RV64-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; RV64: [[VECTOR_PH]]: +; RV64-NEXT: br label %[[VECTOR_BODY:.*]] +; RV64: [[VECTOR_BODY]]: +; RV64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; RV64-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] +; RV64-NEXT: [[DEC_IV:%.*]] = add i64 [[OFFSET_IDX]], 0 +; RV64-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], -1 +; RV64-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], -2 +; RV64-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], -3 +; RV64-NEXT: [[IV_NEXT:%.*]] = add nsw i64 [[DEC_IV]], -1 +; RV64-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP1]], -1 +; RV64-NEXT: [[TMP6:%.*]] = add nsw i64 [[TMP2]], -1 +; RV64-NEXT: [[TMP7:%.*]] = add nsw i64 [[TMP3]], -1 +; RV64-NEXT: [[ARRAYIDX_B:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[IV_NEXT]] +; RV64-NEXT: [[TMP9:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP5]] +; RV64-NEXT: [[TMP10:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP6]] +; RV64-NEXT: [[TMP11:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP7]] +; RV64-NEXT: [[TMP0:%.*]] = load i7, ptr [[ARRAYIDX_B]], align 1 +; RV64-NEXT: [[TMP13:%.*]] = load i7, ptr [[TMP9]], align 1 +; RV64-NEXT: [[TMP14:%.*]] = load i7, ptr [[TMP10]], align 1 +; RV64-NEXT: [[TMP15:%.*]] = load i7, ptr [[TMP11]], align 1 +; RV64-NEXT: [[TMP16:%.*]] = insertelement <4 x i7> poison, i7 [[TMP0]], i32 0 +; RV64-NEXT: [[TMP17:%.*]] = insertelement <4 x i7> [[TMP16]], i7 [[TMP13]], i32 1 +; RV64-NEXT: [[TMP18:%.*]] = insertelement <4 x i7> [[TMP17]], i7 [[TMP14]], i32 2 +; RV64-NEXT: [[TMP19:%.*]] = insertelement <4 x i7> [[TMP18]], i7 [[TMP15]], i32 3 +; RV64-NEXT: [[TMP20:%.*]] = add <4 x i7> [[TMP19]], splat (i7 1) +; RV64-NEXT: [[TMP21:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[IV_NEXT]] +; RV64-NEXT: [[TMP22:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP5]] +; RV64-NEXT: [[TMP23:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP6]] +; RV64-NEXT: [[TMP24:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP7]] +; RV64-NEXT: [[TMP25:%.*]] = extractelement <4 x i7> [[TMP20]], i32 0 +; RV64-NEXT: store i7 [[TMP25]], ptr [[TMP21]], align 1 +; RV64-NEXT: [[TMP26:%.*]] = extractelement <4 x i7> [[TMP20]], i32 1 +; RV64-NEXT: store i7 [[TMP26]], ptr [[TMP22]], align 1 +; RV64-NEXT: [[TMP27:%.*]] = extractelement <4 x i7> [[TMP20]], i32 2 +; RV64-NEXT: store i7 [[TMP27]], ptr [[TMP23]], align 1 +; RV64-NEXT: [[TMP28:%.*]] = extractelement <4 x i7> [[TMP20]], i32 3 +; RV64-NEXT: store i7 [[TMP28]], ptr [[TMP24]], align 1 +; RV64-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; RV64-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1020 +; RV64-NEXT: br i1 [[TMP29]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; RV64: [[MIDDLE_BLOCK]]: +; RV64-NEXT: br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; RV64: [[SCALAR_PH]]: +; RV64-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 3, %[[MIDDLE_BLOCK]] ], [ 1023, %[[ENTRY]] ] +; RV64-NEXT: br label %[[FOR_BODY:.*]] +; RV64: [[FOR_BODY]]: +; RV64-NEXT: [[DEC_IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], %[[FOR_BODY]] ] +; RV64-NEXT: [[IV_NEXT1]] = add nsw i64 [[DEC_IV1]], -1 +; RV64-NEXT: [[ARRAYIDX_B1:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[IV_NEXT1]] +; RV64-NEXT: [[TMP30:%.*]] = load i7, ptr [[ARRAYIDX_B1]], align 1 +; RV64-NEXT: [[ADD:%.*]] = add i7 [[TMP30]], 1 +; RV64-NEXT: [[ARRAYIDX_A:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[IV_NEXT1]] +; RV64-NEXT: store i7 [[ADD]], ptr [[ARRAYIDX_A]], align 1 +; RV64-NEXT: [[CMP:%.*]] = icmp ugt i64 [[DEC_IV1]], 1 +; RV64-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[EXIT]], !llvm.loop [[LOOP7:![0-9]+]] +; RV64: [[EXIT]]: +; RV64-NEXT: ret void +; +; RV32-LABEL: define void @vector_reverse_irregular_type( +; RV32-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) #[[ATTR0]] { +; RV32-NEXT: [[ENTRY:.*]]: +; RV32-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; RV32: [[VECTOR_PH]]: +; RV32-NEXT: br label %[[VECTOR_BODY:.*]] +; RV32: [[VECTOR_BODY]]: +; RV32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; RV32-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] +; RV32-NEXT: [[DEC_IV:%.*]] = add i64 [[OFFSET_IDX]], 0 +; RV32-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], -1 +; RV32-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], -2 +; RV32-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], -3 +; RV32-NEXT: [[IV_NEXT:%.*]] = add nsw i64 [[DEC_IV]], -1 +; RV32-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP1]], -1 +; RV32-NEXT: [[TMP6:%.*]] = add nsw i64 [[TMP2]], -1 +; RV32-NEXT: [[TMP7:%.*]] = add nsw i64 [[TMP3]], -1 +; RV32-NEXT: [[ARRAYIDX_B:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[IV_NEXT]] +; RV32-NEXT: [[TMP9:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP5]] +; RV32-NEXT: [[TMP10:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP6]] +; RV32-NEXT: [[TMP11:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP7]] +; RV32-NEXT: [[TMP0:%.*]] = load i7, ptr [[ARRAYIDX_B]], align 1 +; RV32-NEXT: [[TMP13:%.*]] = load i7, ptr [[TMP9]], align 1 +; RV32-NEXT: [[TMP14:%.*]] = load i7, ptr [[TMP10]], align 1 +; RV32-NEXT: [[TMP15:%.*]] = load i7, ptr [[TMP11]], align 1 +; RV32-NEXT: [[TMP16:%.*]] = insertelement <4 x i7> poison, i7 [[TMP0]], i32 0 +; RV32-NEXT: [[TMP17:%.*]] = insertelement <4 x i7> [[TMP16]], i7 [[TMP13]], i32 1 +; RV32-NEXT: [[TMP18:%.*]] = insertelement <4 x i7> [[TMP17]], i7 [[TMP14]], i32 2 +; RV32-NEXT: [[TMP19:%.*]] = insertelement <4 x i7> [[TMP18]], i7 [[TMP15]], i32 3 +; RV32-NEXT: [[TMP20:%.*]] = add <4 x i7> [[TMP19]], splat (i7 1) +; RV32-NEXT: [[TMP21:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[IV_NEXT]] +; RV32-NEXT: [[TMP22:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP5]] +; RV32-NEXT: [[TMP23:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP6]] +; RV32-NEXT: [[TMP24:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP7]] +; RV32-NEXT: [[TMP25:%.*]] = extractelement <4 x i7> [[TMP20]], i32 0 +; RV32-NEXT: store i7 [[TMP25]], ptr [[TMP21]], align 1 +; RV32-NEXT: [[TMP26:%.*]] = extractelement <4 x i7> [[TMP20]], i32 1 +; RV32-NEXT: store i7 [[TMP26]], ptr [[TMP22]], align 1 +; RV32-NEXT: [[TMP27:%.*]] = extractelement <4 x i7> [[TMP20]], i32 2 +; RV32-NEXT: store i7 [[TMP27]], ptr [[TMP23]], align 1 +; RV32-NEXT: [[TMP28:%.*]] = extractelement <4 x i7> [[TMP20]], i32 3 +; RV32-NEXT: store i7 [[TMP28]], ptr [[TMP24]], align 1 +; RV32-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; RV32-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1020 +; RV32-NEXT: br i1 [[TMP29]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; RV32: [[MIDDLE_BLOCK]]: +; RV32-NEXT: br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; RV32: [[SCALAR_PH]]: +; RV32-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 3, %[[MIDDLE_BLOCK]] ], [ 1023, %[[ENTRY]] ] +; RV32-NEXT: br label %[[FOR_BODY:.*]] +; RV32: [[FOR_BODY]]: +; RV32-NEXT: [[DEC_IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], %[[FOR_BODY]] ] +; RV32-NEXT: [[IV_NEXT1]] = add nsw i64 [[DEC_IV1]], -1 +; RV32-NEXT: [[ARRAYIDX_B1:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[IV_NEXT1]] +; RV32-NEXT: [[TMP30:%.*]] = load i7, ptr [[ARRAYIDX_B1]], align 1 +; RV32-NEXT: [[ADD:%.*]] = add i7 [[TMP30]], 1 +; RV32-NEXT: [[ARRAYIDX_A:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[IV_NEXT1]] +; RV32-NEXT: store i7 [[ADD]], ptr [[ARRAYIDX_A]], align 1 +; RV32-NEXT: [[CMP:%.*]] = icmp ugt i64 [[DEC_IV1]], 1 +; RV32-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[EXIT]], !llvm.loop [[LOOP7:![0-9]+]] +; RV32: [[EXIT]]: +; RV32-NEXT: ret void +; +; RV64-UF2-LABEL: define void @vector_reverse_irregular_type( +; RV64-UF2-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) #[[ATTR0]] { +; RV64-UF2-NEXT: [[ENTRY:.*]]: +; RV64-UF2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; RV64-UF2: [[VECTOR_PH]]: +; RV64-UF2-NEXT: br label %[[VECTOR_BODY:.*]] +; RV64-UF2: [[VECTOR_BODY]]: +; RV64-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; RV64-UF2-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] +; RV64-UF2-NEXT: [[TMP16:%.*]] = add i64 [[OFFSET_IDX]], 0 +; RV64-UF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], -1 +; RV64-UF2-NEXT: [[TMP17:%.*]] = add i64 [[OFFSET_IDX]], -2 +; RV64-UF2-NEXT: [[TMP24:%.*]] = add i64 [[OFFSET_IDX]], -3 +; RV64-UF2-NEXT: [[TMP25:%.*]] = add i64 [[OFFSET_IDX]], -4 +; RV64-UF2-NEXT: [[TMP42:%.*]] = add i64 [[OFFSET_IDX]], -5 +; RV64-UF2-NEXT: [[TMP43:%.*]] = add i64 [[OFFSET_IDX]], -6 +; RV64-UF2-NEXT: [[TMP50:%.*]] = add i64 [[OFFSET_IDX]], -7 +; RV64-UF2-NEXT: [[TMP1:%.*]] = add nsw i64 [[TMP16]], -1 +; RV64-UF2-NEXT: [[TMP2:%.*]] = add nsw i64 [[TMP0]], -1 +; RV64-UF2-NEXT: [[TMP51:%.*]] = add nsw i64 [[TMP17]], -1 +; RV64-UF2-NEXT: [[TMP11:%.*]] = add nsw i64 [[TMP24]], -1 +; RV64-UF2-NEXT: [[TMP59:%.*]] = add nsw i64 [[TMP25]], -1 +; RV64-UF2-NEXT: [[TMP13:%.*]] = add nsw i64 [[TMP42]], -1 +; RV64-UF2-NEXT: [[TMP14:%.*]] = add nsw i64 [[TMP43]], -1 +; RV64-UF2-NEXT: [[TMP15:%.*]] = add nsw i64 [[TMP50]], -1 +; RV64-UF2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP1]] +; RV64-UF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP2]] +; RV64-UF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP51]] +; RV64-UF2-NEXT: [[TMP19:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP11]] +; RV64-UF2-NEXT: [[TMP20:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP59]] +; RV64-UF2-NEXT: [[TMP21:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP13]] +; RV64-UF2-NEXT: [[TMP22:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP14]] +; RV64-UF2-NEXT: [[TMP23:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP15]] +; RV64-UF2-NEXT: [[TMP5:%.*]] = load i7, ptr [[TMP3]], align 1 +; RV64-UF2-NEXT: [[TMP6:%.*]] = load i7, ptr [[TMP4]], align 1 +; RV64-UF2-NEXT: [[TMP26:%.*]] = load i7, ptr [[TMP18]], align 1 +; RV64-UF2-NEXT: [[TMP27:%.*]] = load i7, ptr [[TMP19]], align 1 +; RV64-UF2-NEXT: [[TMP28:%.*]] = insertelement <4 x i7> poison, i7 [[TMP5]], i32 0 +; RV64-UF2-NEXT: [[TMP29:%.*]] = insertelement <4 x i7> [[TMP28]], i7 [[TMP6]], i32 1 +; RV64-UF2-NEXT: [[TMP30:%.*]] = insertelement <4 x i7> [[TMP29]], i7 [[TMP26]], i32 2 +; RV64-UF2-NEXT: [[TMP31:%.*]] = insertelement <4 x i7> [[TMP30]], i7 [[TMP27]], i32 3 +; RV64-UF2-NEXT: [[TMP32:%.*]] = load i7, ptr [[TMP20]], align 1 +; RV64-UF2-NEXT: [[TMP33:%.*]] = load i7, ptr [[TMP21]], align 1 +; RV64-UF2-NEXT: [[TMP34:%.*]] = load i7, ptr [[TMP22]], align 1 +; RV64-UF2-NEXT: [[TMP35:%.*]] = load i7, ptr [[TMP23]], align 1 +; RV64-UF2-NEXT: [[TMP36:%.*]] = insertelement <4 x i7> poison, i7 [[TMP32]], i32 0 +; RV64-UF2-NEXT: [[TMP37:%.*]] = insertelement <4 x i7> [[TMP36]], i7 [[TMP33]], i32 1 +; RV64-UF2-NEXT: [[TMP38:%.*]] = insertelement <4 x i7> [[TMP37]], i7 [[TMP34]], i32 2 +; RV64-UF2-NEXT: [[TMP39:%.*]] = insertelement <4 x i7> [[TMP38]], i7 [[TMP35]], i32 3 +; RV64-UF2-NEXT: [[TMP40:%.*]] = add <4 x i7> [[TMP31]], splat (i7 1) +; RV64-UF2-NEXT: [[TMP41:%.*]] = add <4 x i7> [[TMP39]], splat (i7 1) +; RV64-UF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP1]] +; RV64-UF2-NEXT: [[TMP10:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP2]] +; RV64-UF2-NEXT: [[TMP44:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP51]] +; RV64-UF2-NEXT: [[TMP45:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP11]] +; RV64-UF2-NEXT: [[TMP46:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP59]] +; RV64-UF2-NEXT: [[TMP47:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP13]] +; RV64-UF2-NEXT: [[TMP48:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP14]] +; RV64-UF2-NEXT: [[TMP49:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP15]] +; RV64-UF2-NEXT: [[TMP7:%.*]] = extractelement <4 x i7> [[TMP40]], i32 0 +; RV64-UF2-NEXT: store i7 [[TMP7]], ptr [[TMP9]], align 1 +; RV64-UF2-NEXT: [[TMP8:%.*]] = extractelement <4 x i7> [[TMP40]], i32 1 +; RV64-UF2-NEXT: store i7 [[TMP8]], ptr [[TMP10]], align 1 +; RV64-UF2-NEXT: [[TMP52:%.*]] = extractelement <4 x i7> [[TMP40]], i32 2 +; RV64-UF2-NEXT: store i7 [[TMP52]], ptr [[TMP44]], align 1 +; RV64-UF2-NEXT: [[TMP53:%.*]] = extractelement <4 x i7> [[TMP40]], i32 3 +; RV64-UF2-NEXT: store i7 [[TMP53]], ptr [[TMP45]], align 1 +; RV64-UF2-NEXT: [[TMP54:%.*]] = extractelement <4 x i7> [[TMP41]], i32 0 +; RV64-UF2-NEXT: store i7 [[TMP54]], ptr [[TMP46]], align 1 +; RV64-UF2-NEXT: [[TMP55:%.*]] = extractelement <4 x i7> [[TMP41]], i32 1 +; RV64-UF2-NEXT: store i7 [[TMP55]], ptr [[TMP47]], align 1 +; RV64-UF2-NEXT: [[TMP56:%.*]] = extractelement <4 x i7> [[TMP41]], i32 2 +; RV64-UF2-NEXT: store i7 [[TMP56]], ptr [[TMP48]], align 1 +; RV64-UF2-NEXT: [[TMP57:%.*]] = extractelement <4 x i7> [[TMP41]], i32 3 +; RV64-UF2-NEXT: store i7 [[TMP57]], ptr [[TMP49]], align 1 +; RV64-UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; RV64-UF2-NEXT: [[TMP58:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1016 +; RV64-UF2-NEXT: br i1 [[TMP58]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; RV64-UF2: [[MIDDLE_BLOCK]]: +; RV64-UF2-NEXT: br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; RV64-UF2: [[SCALAR_PH]]: +; RV64-UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 7, %[[MIDDLE_BLOCK]] ], [ 1023, %[[ENTRY]] ] +; RV64-UF2-NEXT: br label %[[FOR_BODY:.*]] +; RV64-UF2: [[FOR_BODY]]: +; RV64-UF2-NEXT: [[DEC_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] +; RV64-UF2-NEXT: [[IV_NEXT]] = add nsw i64 [[DEC_IV]], -1 +; RV64-UF2-NEXT: [[ARRAYIDX_B:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[IV_NEXT]] +; RV64-UF2-NEXT: [[TMP12:%.*]] = load i7, ptr [[ARRAYIDX_B]], align 1 +; RV64-UF2-NEXT: [[ADD:%.*]] = add i7 [[TMP12]], 1 +; RV64-UF2-NEXT: [[ARRAYIDX_A:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[IV_NEXT]] +; RV64-UF2-NEXT: store i7 [[ADD]], ptr [[ARRAYIDX_A]], align 1 +; RV64-UF2-NEXT: [[CMP:%.*]] = icmp ugt i64 [[DEC_IV]], 1 +; RV64-UF2-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[EXIT]], !llvm.loop [[LOOP7:![0-9]+]] +; RV64-UF2: [[EXIT]]: +; RV64-UF2-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %dec.iv = phi i64 [ 1023, %entry ], [ %iv.next, %for.body ] + %iv.next = add nsw i64 %dec.iv, -1 + %arrayidx.b = getelementptr inbounds i7, ptr %B, i64 %iv.next + %0 = load i7, ptr %arrayidx.b, align 1 + %add = add i7 %0, 1 + %arrayidx.a = getelementptr inbounds i7, ptr %A, i64 %iv.next + store i7 %add, ptr %arrayidx.a, align 1 + %cmp = icmp ugt i64 %dec.iv, 1 + br i1 %cmp, label %for.body, label %exit, !llvm.loop !4 + +exit: + ret void +} + !0 = distinct !{!0, !1, !2, !3} !1 = !{!"llvm.loop.vectorize.width", i32 4} !2 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} !3 = !{!"llvm.loop.vectorize.enable", i1 true} +!4 = distinct !{!4, !1, !3} diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll index bc0ccfb45c05..02a876a3fda6 100644 --- a/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll +++ b/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll @@ -7,112 +7,86 @@ define void @test(ptr %p, i40 %a) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <16 x i40> poison, i40 [[A]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <16 x i40> [[BROADCAST_SPLATINSERT1]], <16 x i40> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i40> [[BROADCAST_SPLAT2]], splat (i40 24) -; CHECK-NEXT: [[TMP2:%.*]] = ashr <16 x i40> [[TMP1]], splat (i40 28) -; CHECK-NEXT: [[TMP3:%.*]] = trunc <16 x i40> [[TMP2]] to <16 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = trunc <16 x i32> [[TMP3]] to <16 x i1> -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <16 x i1> [[TMP4]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = icmp ult <16 x i1> zeroinitializer, [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = or <16 x i1> [[TMP6]], splat (i1 true) -; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt <16 x i1> [[TMP7]], zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <16 x i1> [[TMP8]], i32 0 -; CHECK-NEXT: store i1 [[TMP10]], ptr [[P]], align 1 +; CHECK-NEXT: store i1 false, ptr [[P]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: ; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]] ; CHECK: pred.store.if1: -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <16 x i1> [[TMP8]], i32 1 -; CHECK-NEXT: store i1 [[TMP9]], ptr [[P]], align 1 +; CHECK-NEXT: store i1 false, ptr [[P]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]] ; CHECK: pred.store.continue2: ; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] ; CHECK: pred.store.if3: -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x i1> [[TMP8]], i32 2 -; CHECK-NEXT: store i1 [[TMP12]], ptr [[P]], align 1 +; CHECK-NEXT: store i1 false, ptr [[P]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]] ; CHECK: pred.store.continue4: ; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]] ; CHECK: pred.store.if5: -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x i1> [[TMP8]], i32 3 -; CHECK-NEXT: store i1 [[TMP14]], ptr [[P]], align 1 +; CHECK-NEXT: store i1 false, ptr [[P]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] ; CHECK: pred.store.continue6: ; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]] ; CHECK: pred.store.if7: -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x i1> [[TMP8]], i32 4 -; CHECK-NEXT: store i1 [[TMP16]], ptr [[P]], align 1 +; CHECK-NEXT: store i1 false, ptr [[P]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]] ; CHECK: pred.store.continue8: ; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]] ; CHECK: pred.store.if9: -; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i1> [[TMP8]], i32 5 -; CHECK-NEXT: store i1 [[TMP18]], ptr [[P]], align 1 +; CHECK-NEXT: store i1 false, ptr [[P]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE10]] ; CHECK: pred.store.continue10: ; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]] ; CHECK: pred.store.if11: -; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i1> [[TMP8]], i32 6 -; CHECK-NEXT: store i1 [[TMP20]], ptr [[P]], align 1 +; CHECK-NEXT: store i1 false, ptr [[P]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]] ; CHECK: pred.store.continue12: ; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14:%.*]] ; CHECK: pred.store.if13: -; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i1> [[TMP8]], i32 7 -; CHECK-NEXT: store i1 [[TMP22]], ptr [[P]], align 1 +; CHECK-NEXT: store i1 false, ptr [[P]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE14]] ; CHECK: pred.store.continue14: ; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF15:%.*]], label [[PRED_STORE_CONTINUE16:%.*]] ; CHECK: pred.store.if15: -; CHECK-NEXT: [[TMP24:%.*]] = extractelement <16 x i1> [[TMP8]], i32 8 -; CHECK-NEXT: store i1 [[TMP24]], ptr [[P]], align 1 +; CHECK-NEXT: store i1 false, ptr [[P]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE16]] ; CHECK: pred.store.continue16: ; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF17:%.*]], label [[PRED_STORE_CONTINUE18:%.*]] ; CHECK: pred.store.if17: -; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i1> [[TMP8]], i32 9 -; CHECK-NEXT: store i1 [[TMP26]], ptr [[P]], align 1 +; CHECK-NEXT: store i1 false, ptr [[P]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE18]] ; CHECK: pred.store.continue18: ; CHECK-NEXT: br i1 false, label [[PRED_STORE_IF19:%.*]], label [[PRED_STORE_CONTINUE20:%.*]] ; CHECK: pred.store.if19: -; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i1> [[TMP8]], i32 10 -; CHECK-NEXT: store i1 [[TMP28]], ptr [[P]], align 1 +; CHECK-NEXT: store i1 false, ptr [[P]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE20]] ; CHECK: pred.store.continue20: ; CHECK-NEXT: br i1 false, label [[PRED_STORE_IF21:%.*]], label [[PRED_STORE_CONTINUE22:%.*]] ; CHECK: pred.store.if21: -; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i1> [[TMP8]], i32 11 -; CHECK-NEXT: store i1 [[TMP30]], ptr [[P]], align 1 +; CHECK-NEXT: store i1 false, ptr [[P]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE22]] ; CHECK: pred.store.continue22: ; CHECK-NEXT: br i1 false, label [[PRED_STORE_IF23:%.*]], label [[PRED_STORE_CONTINUE24:%.*]] ; CHECK: pred.store.if23: -; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i1> [[TMP8]], i32 12 -; CHECK-NEXT: store i1 [[TMP32]], ptr [[P]], align 1 +; CHECK-NEXT: store i1 false, ptr [[P]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE24]] ; CHECK: pred.store.continue24: ; CHECK-NEXT: br i1 false, label [[PRED_STORE_IF25:%.*]], label [[PRED_STORE_CONTINUE26:%.*]] ; CHECK: pred.store.if25: -; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i1> [[TMP8]], i32 13 -; CHECK-NEXT: store i1 [[TMP34]], ptr [[P]], align 1 +; CHECK-NEXT: store i1 false, ptr [[P]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE26]] ; CHECK: pred.store.continue26: ; CHECK-NEXT: br i1 false, label [[PRED_STORE_IF27:%.*]], label [[PRED_STORE_CONTINUE28:%.*]] ; CHECK: pred.store.if27: -; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i1> [[TMP8]], i32 14 -; CHECK-NEXT: store i1 [[TMP36]], ptr [[P]], align 1 +; CHECK-NEXT: store i1 false, ptr [[P]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE28]] ; CHECK: pred.store.continue28: ; CHECK-NEXT: br i1 false, label [[PRED_STORE_IF29:%.*]], label [[PRED_STORE_CONTINUE30:%.*]] ; CHECK: pred.store.if29: -; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i1> [[TMP8]], i32 15 -; CHECK-NEXT: store i1 [[TMP38]], ptr [[P]], align 1 +; CHECK-NEXT: store i1 false, ptr [[P]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE30]] ; CHECK: pred.store.continue30: ; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll index 83e2f84814ad..7d9ed7d6215c 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll @@ -58,3 +58,212 @@ bb2: bb3: ret void } + +; Test case for https://github.com/llvm/llvm-project/issues/131359. +define void @redundant_or_1(ptr %dst, i1 %c.0, i1 %c.1) { +; CHECK-LABEL: @redundant_or_1( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C_0:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i1> poison, i1 [[C_1:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT1]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT2]], splat (i1 true) +; CHECK-NEXT: [[TMP4:%.*]] = select i1 true, <4 x i1> [[BROADCAST_SPLAT]], <4 x i1> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE8:%.*]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 0, i8 1, i8 2, i8 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE8]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i8> [[VEC_IND]], splat (i8 2) +; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP0]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> [[TMP4]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0 +; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; CHECK: pred.store.if: +; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[TMP7]] +; CHECK-NEXT: store i32 0, ptr [[TMP8]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] +; CHECK: pred.store.continue: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP5]], i32 1 +; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] +; CHECK: pred.store.if3: +; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[INDEX]], 1 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP10]] +; CHECK-NEXT: store i32 0, ptr [[TMP11]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]] +; CHECK: pred.store.continue4: +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP5]], i32 2 +; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]] +; CHECK: pred.store.if5: +; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[INDEX]], 2 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP13]] +; CHECK-NEXT: store i32 0, ptr [[TMP14]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] +; CHECK: pred.store.continue6: +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP5]], i32 3 +; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8]] +; CHECK: pred.store.if7: +; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[INDEX]], 3 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP16]] +; CHECK-NEXT: store i32 0, ptr [[TMP17]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]] +; CHECK: pred.store.continue8: +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: br i1 [[C_1]], label [[LOOP_LATCH]], label [[THEN_1:%.*]] +; CHECK: then.1: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[IV]], 2 +; CHECK-NEXT: [[OR:%.*]] = or i1 [[CMP]], true +; CHECK-NEXT: [[COND:%.*]] = select i1 [[OR]], i1 [[C_0]], i1 false +; CHECK-NEXT: br i1 [[COND]], label [[THEN_2:%.*]], label [[LOOP_LATCH]] +; CHECK: then.2: +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[IV]] +; CHECK-NEXT: store i32 0, ptr [[GEP]], align 4 +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 3 +; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + br i1 %c.0, label %loop.latch, label %then.1 + +then.1: + %cmp = icmp eq i32 %iv, 2 + %or = or i1 %cmp, true + %cond = select i1 %or, i1 %c.1, i1 false + br i1 %cond, label %then.2, label %loop.latch + +then.2: + %gep = getelementptr inbounds i32, ptr %dst, i32 %iv + store i32 0, ptr %gep, align 4 + br label %loop.latch + +loop.latch: + %iv.next = add nuw nsw i32 %iv, 1 + %ec = icmp eq i32 %iv.next, 3 + br i1 %ec, label %exit, label %loop.header + +exit: + ret void +} + +define void @redundant_or_2(ptr %dst, i1 %c.0, i1 %c.1) { +; CHECK-LABEL: @redundant_or_2( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C_1:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i1> poison, i1 [[C_0:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT1]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT2]], splat (i1 true) +; CHECK-NEXT: [[TMP1:%.*]] = select i1 true, <4 x i1> [[BROADCAST_SPLAT]], <4 x i1> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE8:%.*]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 0, i8 1, i8 2, i8 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE8]] ] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <4 x i8> [[VEC_IND]], splat (i8 2) +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> [[TMP0]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i1> [[TMP1]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0 +; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; CHECK: pred.store.if: +; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[TMP6]] +; CHECK-NEXT: store i32 0, ptr [[TMP7]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] +; CHECK: pred.store.continue: +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1 +; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] +; CHECK: pred.store.if3: +; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[INDEX]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP9]] +; CHECK-NEXT: store i32 0, ptr [[TMP10]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]] +; CHECK: pred.store.continue4: +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2 +; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]] +; CHECK: pred.store.if5: +; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[INDEX]], 2 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP12]] +; CHECK-NEXT: store i32 0, ptr [[TMP13]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] +; CHECK: pred.store.continue6: +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3 +; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8]] +; CHECK: pred.store.if7: +; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[INDEX]], 3 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP15]] +; CHECK-NEXT: store i32 0, ptr [[TMP16]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]] +; CHECK: pred.store.continue8: +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: br i1 [[C_0]], label [[LOOP_LATCH]], label [[THEN_1:%.*]] +; CHECK: then.1: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[IV]], 2 +; CHECK-NEXT: [[OR:%.*]] = or i1 true, [[CMP]] +; CHECK-NEXT: [[COND:%.*]] = select i1 [[OR]], i1 [[C_1]], i1 false +; CHECK-NEXT: br i1 [[COND]], label [[THEN_2:%.*]], label [[LOOP_LATCH]] +; CHECK: then.2: +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[IV]] +; CHECK-NEXT: store i32 0, ptr [[GEP]], align 4 +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 3 +; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + br i1 %c.0, label %loop.latch, label %then.1 + +then.1: + %cmp = icmp eq i32 %iv, 2 + %or = or i1 true, %cmp + %cond = select i1 %or, i1 %c.1, i1 false + br i1 %cond, label %then.2, label %loop.latch + +then.2: + %gep = getelementptr inbounds i32, ptr %dst, i32 %iv + store i32 0, ptr %gep, align 4 + br label %loop.latch + +loop.latch: + %iv.next = add nuw nsw i32 %iv, 1 + %ec = icmp eq i32 %iv.next, 3 + br i1 %ec, label %exit, label %loop.header + +exit: + ret void +} diff --git a/llvm/test/Transforms/ObjCARC/contract.ll b/llvm/test/Transforms/ObjCARC/contract.ll index 70bd57a0c719..24f9a712ccd0 100644 --- a/llvm/test/Transforms/ObjCARC/contract.ll +++ b/llvm/test/Transforms/ObjCARC/contract.ll @@ -234,6 +234,22 @@ define void @test14(ptr %a, ptr %b) { ret void } +define void @test15(ptr %x) { +; CHECK-LABEL: define void @test15( +; CHECK-SAME: ptr [[X:%.*]]) { +; CHECK-NEXT: [[Y:%.*]] = getelementptr inbounds ptr, ptr [[X]], i32 0 +; CHECK-NEXT: [[V0:%.*]] = call ptr @llvm.objc.retain(ptr [[Y]]) #[[ATTR0:[0-9]+]] +; CHECK-NEXT: call void @use_pointer(ptr [[V0]]) +; CHECK-NEXT: call void @use_pointer(ptr [[V0]]) +; CHECK-NEXT: ret void +; + %y = getelementptr inbounds ptr, ptr %x, i32 0 + %v0 = call ptr @llvm.objc.retain(ptr %y) nounwind + call void @use_pointer(ptr %x) + call void @use_pointer(ptr %y) + ret void +} + declare void @llvm.objc.clang.arc.use(...) nounwind declare void @llvm.objc.clang.arc.noop.use(...) nounwind diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll index ffbacc1a8903..95bf296af9b0 100644 --- a/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll +++ b/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll @@ -1028,15 +1028,14 @@ define i32 @nodefaultwithholes(i32 %c) { ; CHECK-LABEL: @nodefaultwithholes( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[C:%.*]], 6 -; CHECK-NEXT: br i1 [[TMP0]], label [[SWITCH_HOLE_CHECK:%.*]], label [[SW_DEFAULT:%.*]] -; CHECK: sw.default: -; CHECK-NEXT: call void @exit(i32 1) -; CHECK-NEXT: unreachable -; CHECK: switch.hole_check: ; CHECK-NEXT: [[SWITCH_MASKINDEX:%.*]] = trunc i32 [[C]] to i8 ; CHECK-NEXT: [[SWITCH_SHIFTED:%.*]] = lshr i8 47, [[SWITCH_MASKINDEX]] ; CHECK-NEXT: [[SWITCH_LOBIT:%.*]] = trunc i8 [[SWITCH_SHIFTED]] to i1 -; CHECK-NEXT: br i1 [[SWITCH_LOBIT]], label [[SWITCH_LOOKUP:%.*]], label [[SW_DEFAULT]] +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[TMP0]], i1 [[SWITCH_LOBIT]], i1 false +; CHECK-NEXT: br i1 [[OR_COND]], label [[SWITCH_LOOKUP:%.*]], label [[SW_DEFAULT:%.*]] +; CHECK: sw.default: +; CHECK-NEXT: call void @exit(i32 1) +; CHECK-NEXT: unreachable ; CHECK: switch.lookup: ; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [6 x i32], ptr @switch.table.nodefaultwithholes, i32 0, i32 [[C]] ; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4 diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table_big.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table_big.ll index 4ebf09ae3b12..fd6b21a7f9e6 100644 --- a/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table_big.ll +++ b/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table_big.ll @@ -143,17 +143,16 @@ define i32 @reachable_default_holes_0to31(i32 %x, i32 %y) { ; CHECK-LABEL: @reachable_default_holes_0to31( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[X:%.*]], 32 -; CHECK-NEXT: br i1 [[TMP0]], label [[SWITCH_HOLE_CHECK:%.*]], label [[RETURN:%.*]] -; CHECK: switch.hole_check: ; CHECK-NEXT: [[SWITCH_SHIFTED:%.*]] = lshr i32 -277094665, [[X]] ; CHECK-NEXT: [[SWITCH_LOBIT:%.*]] = trunc i32 [[SWITCH_SHIFTED]] to i1 -; CHECK-NEXT: br i1 [[SWITCH_LOBIT]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN]] +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[TMP0]], i1 [[SWITCH_LOBIT]], i1 false +; CHECK-NEXT: br i1 [[OR_COND]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]] ; CHECK: switch.lookup: ; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [32 x i32], ptr @switch.table.reachable_default_holes_0to31, i32 0, i32 [[X]] ; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4 ; CHECK-NEXT: br label [[RETURN]] ; CHECK: return: -; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[SWITCH_LOAD]], [[SWITCH_LOOKUP]] ], [ [[Y:%.*]], [[SWITCH_HOLE_CHECK]] ], [ [[Y]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[SWITCH_LOAD]], [[SWITCH_LOOKUP]] ], [ [[Y:%.*]], [[ENTRY:%.*]] ] ; CHECK-NEXT: ret i32 [[RES]] ; entry: diff --git a/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest.ll b/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest.ll index 37f48a9a7e03..7b88ec338cf5 100644 --- a/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest.ll +++ b/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest.ll @@ -88,10 +88,9 @@ define void @one_pred_trunc_cond(i8 %v0, i8 %v1) { ; CHECK-LABEL: @one_pred_trunc_cond( ; CHECK-NEXT: pred: ; CHECK-NEXT: [[C0:%.*]] = icmp eq i8 [[V0:%.*]], 0 -; CHECK-NEXT: br i1 [[C0]], label [[DISPATCH:%.*]], label [[FINAL_RIGHT:%.*]] -; CHECK: dispatch: ; CHECK-NEXT: [[C1:%.*]] = trunc i8 [[V1:%.*]] to i1 -; CHECK-NEXT: br i1 [[C1]], label [[FINAL_LEFT:%.*]], label [[FINAL_RIGHT]] +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[C0]], i1 [[C1]], i1 false +; CHECK-NEXT: br i1 [[OR_COND]], label [[FINAL_LEFT:%.*]], label [[FINAL_RIGHT:%.*]] ; CHECK: common.ret: ; CHECK-NEXT: ret void ; CHECK: final_left: diff --git a/llvm/unittests/ADT/APFloatTest.cpp b/llvm/unittests/ADT/APFloatTest.cpp index 09356191345f..7a5fd83cd958 100644 --- a/llvm/unittests/ADT/APFloatTest.cpp +++ b/llvm/unittests/ADT/APFloatTest.cpp @@ -8347,4 +8347,13 @@ TEST(APFloatTest, AddOrSubtractSignificand) { Helper::runTest(true, false, 3, 0x10001, false, 7, 0x100, false, 6, 0x1e00, lfLessThanHalf); } + +TEST(APFloatTest, hasSignBitInMSB) { + EXPECT_TRUE(APFloat::hasSignBitInMSB(APFloat::IEEEsingle())); + EXPECT_TRUE(APFloat::hasSignBitInMSB(APFloat::x87DoubleExtended())); + EXPECT_TRUE(APFloat::hasSignBitInMSB(APFloat::PPCDoubleDouble())); + EXPECT_TRUE(APFloat::hasSignBitInMSB(APFloat::IEEEquad())); + EXPECT_FALSE(APFloat::hasSignBitInMSB(APFloat::Float8E8M0FNU())); +} + } // namespace diff --git a/llvm/utils/TableGen/AsmMatcherEmitter.cpp b/llvm/utils/TableGen/AsmMatcherEmitter.cpp index 24822c847046..c954163cdeb3 100644 --- a/llvm/utils/TableGen/AsmMatcherEmitter.cpp +++ b/llvm/utils/TableGen/AsmMatcherEmitter.cpp @@ -110,6 +110,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" #include "llvm/TableGen/StringMatcher.h" diff --git a/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h b/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h index 35a9abdc37c8..8da6fbef0672 100644 --- a/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h +++ b/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h @@ -15,15 +15,14 @@ #define LLVM_UTILS_TABLEGEN_BASIC_SEQUENCETOOFFSETTABLE_H #include "llvm/ADT/StringExtras.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/TableGen/Main.h" #include <algorithm> #include <cassert> #include <functional> #include <map> namespace llvm { -extern cl::opt<bool> EmitLongStrLiterals; inline void printChar(raw_ostream &OS, char C) { unsigned char UC(C); diff --git a/llvm/utils/TableGen/Basic/TableGen.cpp b/llvm/utils/TableGen/Basic/TableGen.cpp index 80ac93f2b54f..edb779150069 100644 --- a/llvm/utils/TableGen/Basic/TableGen.cpp +++ b/llvm/utils/TableGen/Basic/TableGen.cpp @@ -26,15 +26,6 @@ using namespace llvm; -namespace llvm { -cl::opt<bool> EmitLongStrLiterals( - "long-string-literals", - cl::desc("when emitting large string tables, prefer string literals over " - "comma-separated char literals. This can be a readability and " - "compile-time performance win, but upsets some compilers"), - cl::Hidden, cl::init(true)); -} // end namespace llvm - static cl::OptionCategory PrintEnumsCat("Options for -print-enums"); static cl::opt<std::string> Class("class", cl::desc("Print Enum list for this class"), diff --git a/llvm/utils/TableGen/SDNodeInfoEmitter.cpp b/llvm/utils/TableGen/SDNodeInfoEmitter.cpp index 63ee0deb8711..64f03dae83e7 100644 --- a/llvm/utils/TableGen/SDNodeInfoEmitter.cpp +++ b/llvm/utils/TableGen/SDNodeInfoEmitter.cpp @@ -9,6 +9,7 @@ #include "Basic/SequenceToOffsetTable.h" #include "Common/CodeGenDAGPatterns.h" // For SDNodeInfo. #include "llvm/Support/CommandLine.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/StringToOffsetTable.h" #include "llvm/TableGen/TableGenBackend.h" diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clangd/test/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clangd/test/BUILD.gn index 9d42409f1973..d40ce6424fe8 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clangd/test/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clangd/test/BUILD.gn @@ -35,6 +35,7 @@ write_lit_config("lit_site_cfg") { rebase_path("//clang-tools-extra/clangd/test"), "CURRENT_TOOLS_DIR=", + "CLANGD_BUILD_DEXP=1", "CLANGD_ENABLE_REMOTE=0", "CLANGD_TIDY_CHECKS=1", "LLVM_HOST_TRIPLE=$llvm_current_triple", diff --git a/llvm/utils/gn/secondary/llvm/lib/TableGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/TableGen/BUILD.gn index d90df7bc0e57..b40fdf154b01 100644 --- a/llvm/utils/gn/secondary/llvm/lib/TableGen/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/TableGen/BUILD.gn @@ -10,6 +10,7 @@ static_library("TableGen") { "Record.cpp", "SetTheory.cpp", "StringMatcher.cpp", + "StringToOffsetTable.cpp", "TGLexer.cpp", "TGParser.cpp", "TGTimer.cpp", diff --git a/mlir/docs/DefiningDialects/Operations.md b/mlir/docs/DefiningDialects/Operations.md index fafda816a388..88d58e0a1efb 100644 --- a/mlir/docs/DefiningDialects/Operations.md +++ b/mlir/docs/DefiningDialects/Operations.md @@ -1756,6 +1756,23 @@ that it has a value within the valid range of the enum. If their wrapper attribute instead of using a bare signless integer attribute for storage. +### Enum properties + +Enums can be wrapped in properties so that they can be stored inline. +This causes a value of the enum's C++ class to become a member of the operation's +property struct and for the operation's verifier to check that the enum's value +is a valid value for the enum. + +The basic wrapper is `EnumProp`, which simply takes an `EnumInfo`. + +A less ambiguous syntax, namely putting a mnemonic and `<>`s surrounding +the enum is generated with `NamedEnumProp`, which takes a `*EnumInfo` +and a mnemonic string, which becomes part of the property's syntax. + +Both of these `EnumProp` types have a `*EnumPropWithAttrForm`, which allows for +transparently upgrading from `EnumAttr`s and optionally retaining those +attributes in the generic form. + ## Debugging Tips ### Run `mlir-tblgen` to see the generated content diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMEnums.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMEnums.td index a9de78780645..34a30a00790e 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMEnums.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMEnums.td @@ -485,17 +485,16 @@ def DISubprogramFlags : I32BitEnumAttr< // IntegerOverflowFlags //===----------------------------------------------------------------------===// -def IOFnone : I32BitEnumAttrCaseNone<"none">; -def IOFnsw : I32BitEnumAttrCaseBit<"nsw", 0>; -def IOFnuw : I32BitEnumAttrCaseBit<"nuw", 1>; +def IOFnone : I32BitEnumCaseNone<"none">; +def IOFnsw : I32BitEnumCaseBit<"nsw", 0>; +def IOFnuw : I32BitEnumCaseBit<"nuw", 1>; -def IntegerOverflowFlags : I32BitEnumAttr< +def IntegerOverflowFlags : I32BitEnum< "IntegerOverflowFlags", "LLVM integer overflow flags", [IOFnone, IOFnsw, IOFnuw]> { let separator = ", "; let cppNamespace = "::mlir::LLVM"; - let genSpecializedAttr = 0; let printBitEnumPrimaryGroups = 1; } @@ -504,6 +503,11 @@ def LLVM_IntegerOverflowFlagsAttr : let assemblyFormat = "`<` $value `>`"; } +def LLVM_IntegerOverflowFlagsProp : + NamedEnumPropWithAttrForm<IntegerOverflowFlags, "overflow", LLVM_IntegerOverflowFlagsAttr> { + let defaultValue = enum.cppType # "::" # "none"; +} + //===----------------------------------------------------------------------===// // FastmathFlags //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td index 7ffa880dc8da..e89e78aec714 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td @@ -60,7 +60,7 @@ class LLVM_IntArithmeticOpWithOverflowFlag<string mnemonic, string instName, list<Trait> traits = []> : LLVM_ArithmeticOpBase<AnySignlessInteger, mnemonic, instName, !listconcat([DeclareOpInterfaceMethods<IntegerOverflowFlagsInterface>], traits)> { - dag iofArg = (ins EnumProp<"IntegerOverflowFlags", "", "IntegerOverflowFlags::none">:$overflowFlags); + dag iofArg = (ins LLVM_IntegerOverflowFlagsProp:$overflowFlags); let arguments = !con(commonArgs, iofArg); string mlirBuilder = [{ @@ -69,7 +69,7 @@ class LLVM_IntArithmeticOpWithOverflowFlag<string mnemonic, string instName, $res = op; }]; let assemblyFormat = [{ - $lhs `,` $rhs `` custom<OverflowFlags>($overflowFlags) attr-dict `:` type($res) + $lhs `,` $rhs ($overflowFlags^)? attr-dict `:` type($res) }]; string llvmBuilder = "$res = builder.Create" # instName # @@ -563,10 +563,10 @@ class LLVM_CastOpWithOverflowFlag<string mnemonic, string instName, Type type, Type resultType, list<Trait> traits = []> : LLVM_Op<mnemonic, !listconcat([Pure], [DeclareOpInterfaceMethods<IntegerOverflowFlagsInterface>], traits)>, LLVM_Builder<"$res = builder.Create" # instName # "($arg, $_resultType, /*Name=*/\"\", op.hasNoUnsignedWrap(), op.hasNoSignedWrap());"> { - let arguments = (ins type:$arg, EnumProp<"IntegerOverflowFlags", "", "IntegerOverflowFlags::none">:$overflowFlags); + let arguments = (ins type:$arg, LLVM_IntegerOverflowFlagsProp:$overflowFlags); let results = (outs resultType:$res); let builders = [LLVM_OneResultOpBuilder]; - let assemblyFormat = "$arg `` custom<OverflowFlags>($overflowFlags) attr-dict `:` type($arg) `to` type($res)"; + let assemblyFormat = "$arg ($overflowFlags^)? attr-dict `:` type($arg) `to` type($res)"; string llvmInstName = instName; string mlirBuilder = [{ auto op = $_builder.create<$_qualCppClassName>( diff --git a/mlir/include/mlir/IR/EnumAttr.td b/mlir/include/mlir/IR/EnumAttr.td index 931126a155fb..3f7f747ac20d 100644 --- a/mlir/include/mlir/IR/EnumAttr.td +++ b/mlir/include/mlir/IR/EnumAttr.td @@ -10,6 +10,7 @@ #define ENUMATTR_TD include "mlir/IR/AttrTypeBase.td" +include "mlir/IR/Properties.td" //===----------------------------------------------------------------------===// // Enum attribute kinds @@ -552,6 +553,141 @@ class EnumAttr<Dialect dialect, EnumInfo enumInfo, string name = "", let assemblyFormat = "$value"; } +// A property wrapping by a C++ enum. This class will automatically create bytecode +// serialization logic for the given enum, as well as arranging for parser and +// printer calls. +class EnumProp<EnumInfo enumInfo> : Property<enumInfo.cppType, enumInfo.summary> { + EnumInfo enum = enumInfo; + + let description = enum.description; + let predicate = !if( + !isa<BitEnumBase>(enum), + CPred<"(static_cast<" # enum.underlyingType # ">($_self) & ~" # !cast<BitEnumBase>(enum).validBits # ") == 0">, + Or<!foreach(case, enum.enumerants, CPred<"$_self == " # enum.cppType # "::" # case.symbol>)>); + + let convertFromAttribute = [{ + auto intAttr = ::mlir::dyn_cast_if_present<::mlir::IntegerAttr>($_attr); + if (!intAttr) { + return $_diag() << "expected IntegerAttr storage for }] # + enum.cppType # [{"; + } + $_storage = static_cast<}] # enum.cppType # [{>(intAttr.getValue().getZExtValue()); + return ::mlir::success(); + }]; + + let convertToAttribute = [{ + return ::mlir::IntegerAttr::get(::mlir::IntegerType::get($_ctxt, }] # enum.bitwidth + # [{), static_cast<}] # enum.underlyingType #[{>($_storage)); + }]; + + let writeToMlirBytecode = [{ + $_writer.writeVarInt(static_cast<uint64_t>($_storage)); + }]; + + let readFromMlirBytecode = [{ + uint64_t rawValue; + if (::mlir::failed($_reader.readVarInt(rawValue))) + return ::mlir::failure(); + if (rawValue > std::numeric_limits<}] # enum.underlyingType # [{>::max()) + return ::mlir::failure(); + $_storage = static_cast<}] # enum.cppType # [{>(rawValue); + }]; + + let optionalParser = [{ + auto value = ::mlir::FieldParser<std::optional<}] # enum.cppType # [{>>::parse($_parser); + if (::mlir::failed(value)) + return ::mlir::failure(); + if (!(value->has_value())) + return std::nullopt; + $_storage = std::move(**value); + }]; +} + +// Enum property that can have been (or, if `storeInCustomAttribute` is true, will also +// be stored as) an attribute, in addition to being stored as an integer attribute. +class EnumPropWithAttrForm<EnumInfo enumInfo, Attr attributeForm> + : EnumProp<enumInfo> { + Attr attrForm = attributeForm; + bit storeInCustomAttribute = 0; + + let convertFromAttribute = [{ + auto customAttr = ::mlir::dyn_cast_if_present<}] + # attrForm.storageType # [{>($_attr); + if (customAttr) { + $_storage = customAttr.getValue(); + return ::mlir::success(); + } + auto intAttr = ::mlir::dyn_cast_if_present<::mlir::IntegerAttr>($_attr); + if (!intAttr) { + return $_diag() << "expected }] # attrForm.storageType + # [{ or IntegerAttr storage for }] # enum.cppType # [{"; + } + $_storage = static_cast<}] # enum.cppType # [{>(intAttr.getValue().getZExtValue()); + return ::mlir::success(); + }]; + + let convertToAttribute = !if(storeInCustomAttribute, [{ + return }] # attrForm.storageType # [{::get($_ctxt, $_storage); + }], [{ + return ::mlir::IntegerAttr::get(::mlir::IntegerType::get($_ctxt, }] # enumInfo.bitwidth + # [{), static_cast<}] # enum.underlyingType #[{>($_storage)); + }]); +} + +class _namedEnumPropFields<string cppType, string mnemonic> { + code parser = [{ + if ($_parser.parseKeyword("}] # mnemonic # [{") + || $_parser.parseLess()) { + return ::mlir::failure(); + } + auto parseRes = ::mlir::FieldParser<}] # cppType # [{>::parse($_parser); + if (::mlir::failed(parseRes) || + ::mlir::failed($_parser.parseGreater())) { + return ::mlir::failure(); + } + $_storage = *parseRes; + }]; + + code optionalParser = [{ + if ($_parser.parseOptionalKeyword("}] # mnemonic # [{")) { + return std::nullopt; + } + if ($_parser.parseLess()) { + return ::mlir::failure(); + } + auto parseRes = ::mlir::FieldParser<}] # cppType # [{>::parse($_parser); + if (::mlir::failed(parseRes) || + ::mlir::failed($_parser.parseGreater())) { + return ::mlir::failure(); + } + $_storage = *parseRes; + }]; + + code printer = [{ + $_printer << "}] # mnemonic # [{<" << $_storage << ">"; + }]; +} + +// An EnumProp which, when printed, is surrounded by mnemonic<>. +// For example, if the enum can be a, b, or c, and the mnemonic is foo, +// the format of this property will be "foo<a>", "foo<b>", or "foo<c>". +class NamedEnumProp<EnumInfo enumInfo, string name> + : EnumProp<enumInfo> { + string mnemonic = name; + let parser = _namedEnumPropFields<enum.cppType, mnemonic>.parser; + let optionalParser = _namedEnumPropFields<enum.cppType, mnemonic>.optionalParser; + let printer = _namedEnumPropFields<enum.cppType, mnemonic>.printer; +} + +// A `NamedEnumProp` with an attribute form as in `EnumPropWithAttrForm`. +class NamedEnumPropWithAttrForm<EnumInfo enumInfo, string name, Attr attributeForm> + : EnumPropWithAttrForm<enumInfo, attributeForm> { + string mnemonic = name; + let parser = _namedEnumPropFields<enum.cppType, mnemonic>.parser; + let optionalParser = _namedEnumPropFields<enum.cppType, mnemonic>.optionalParser; + let printer = _namedEnumPropFields<enumInfo.cppType, mnemonic>.printer; +} + class _symbolToValue<EnumInfo enumInfo, string case> { defvar cases = !filter(iter, enumInfo.enumerants, !eq(iter.str, case)); diff --git a/mlir/include/mlir/IR/Properties.td b/mlir/include/mlir/IR/Properties.td index 8bd834379040..739df03c7ef2 100644 --- a/mlir/include/mlir/IR/Properties.td +++ b/mlir/include/mlir/IR/Properties.td @@ -239,25 +239,6 @@ def I64Prop : IntProp<"int64_t">; def I32Property : IntProp<"int32_t">, Deprecated<"moved to shorter name I32Prop">; def I64Property : IntProp<"int64_t">, Deprecated<"moved to shorter name I64Prop">; -class EnumProp<string storageTypeParam, string desc = "", string default = ""> : - Property<storageTypeParam, desc> { - // TODO: implement predicate for enum validity. - let writeToMlirBytecode = [{ - $_writer.writeVarInt(static_cast<uint64_t>($_storage)); - }]; - let readFromMlirBytecode = [{ - uint64_t val; - if (failed($_reader.readVarInt(val))) - return ::mlir::failure(); - $_storage = static_cast<}] # storageTypeParam # [{>(val); - }]; - let defaultValue = default; -} - -class EnumProperty<string storageTypeParam, string desc = "", string default = ""> : - EnumProp<storageTypeParam, desc, default>, - Deprecated<"moved to shorter name EnumProp">; - // Note: only a class so we can deprecate the old name class _cls_StringProp : Property<"std::string", "string"> { let interfaceType = "::llvm::StringRef"; diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp index 4891dab3aa1d..c6c695b442b4 100644 --- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp +++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp @@ -136,9 +136,13 @@ struct GPUShuffleOpLowering : public ConvertOpToLLVMPattern<gpu::ShuffleOp> { matchAndRewrite(gpu::ShuffleOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { Location loc = op->getLoc(); + Value initShflValue = adaptor.getValue(); + Type shflType = initShflValue.getType(); // TODO: Add support for non 32-bit shuffle values. - if (adaptor.getValue().getType().getIntOrFloatBitWidth() != 32) - return failure(); + if (!shflType.isIntOrFloat() || shflType.getIntOrFloatBitWidth() != 32) + return rewriter.notifyMatchFailure( + op, "only 32-bit int/float types are supported"); + const unsigned indexBitwidth = getTypeConverter()->getIndexTypeBitwidth(); Value srcLaneId = getLaneId(rewriter, loc, indexBitwidth); @@ -175,16 +179,14 @@ struct GPUShuffleOpLowering : public ConvertOpToLLVMPattern<gpu::ShuffleOp> { Value two = rewriter.create<LLVM::ConstantOp>(loc, int32Type, 2); Value dwordAlignedDstLane = rewriter.create<LLVM::ShlOp>(loc, int32Type, selectDstLane, two); - Value initShflValue = adaptor.getValue(); - if (adaptor.getValue().getType().isF32()) { + if (shflType.isF32()) { initShflValue = rewriter.create<LLVM::BitcastOp>(loc, int32Type, initShflValue); } Value shflValue = rewriter.create<ROCDL::DsBpermuteOp>( loc, int32Type, dwordAlignedDstLane, initShflValue); - if (adaptor.getValue().getType().isF32()) { - shflValue = rewriter.create<LLVM::BitcastOp>( - loc, adaptor.getValue().getType(), shflValue); + if (shflType.isF32()) { + shflValue = rewriter.create<LLVM::BitcastOp>(loc, shflType, shflValue); } rewriter.replaceOp(op, {shflValue, isActiveSrcLane}); return success(); diff --git a/mlir/lib/Dialect/GPU/Transforms/ShuffleRewriter.cpp b/mlir/lib/Dialect/GPU/Transforms/ShuffleRewriter.cpp index 4bd4da25f6e5..9f2900214e8b 100644 --- a/mlir/lib/Dialect/GPU/Transforms/ShuffleRewriter.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/ShuffleRewriter.cpp @@ -40,8 +40,9 @@ struct GpuShuffleRewriter : public OpRewritePattern<gpu::ShuffleOp> { auto i64 = rewriter.getI64Type(); // If the type of the value is either i32 or f32, the op is already valid. - if (valueType.getIntOrFloatBitWidth() == 32) - return failure(); + if (!valueType.isIntOrFloat() || valueType.getIntOrFloatBitWidth() != 64) + return rewriter.notifyMatchFailure( + op, "only 64-bit int/float types are supported"); Value lo, hi; diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp index 28d2019b225c..d1d00ca9681e 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp @@ -50,71 +50,6 @@ using mlir::LLVM::tailcallkind::getMaxEnumValForTailCallKind; #include "mlir/Dialect/LLVMIR/LLVMOpsDialect.cpp.inc" //===----------------------------------------------------------------------===// -// Property Helpers -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// IntegerOverflowFlags -//===----------------------------------------------------------------------===// - -namespace mlir { -static Attribute convertToAttribute(MLIRContext *ctx, - IntegerOverflowFlags flags) { - return IntegerOverflowFlagsAttr::get(ctx, flags); -} - -static LogicalResult -convertFromAttribute(IntegerOverflowFlags &flags, Attribute attr, - function_ref<InFlightDiagnostic()> emitError) { - auto flagsAttr = dyn_cast<IntegerOverflowFlagsAttr>(attr); - if (!flagsAttr) { - return emitError() << "expected 'overflowFlags' attribute to be an " - "IntegerOverflowFlagsAttr, but got " - << attr; - } - flags = flagsAttr.getValue(); - return success(); -} -} // namespace mlir - -static ParseResult parseOverflowFlags(AsmParser &p, - IntegerOverflowFlags &flags) { - if (failed(p.parseOptionalKeyword("overflow"))) { - flags = IntegerOverflowFlags::none; - return success(); - } - if (p.parseLess()) - return failure(); - do { - StringRef kw; - SMLoc loc = p.getCurrentLocation(); - if (p.parseKeyword(&kw)) - return failure(); - std::optional<IntegerOverflowFlags> flag = - symbolizeIntegerOverflowFlags(kw); - if (!flag) - return p.emitError(loc, - "invalid overflow flag: expected nsw, nuw, or none"); - flags = flags | *flag; - } while (succeeded(p.parseOptionalComma())); - return p.parseGreater(); -} - -static void printOverflowFlags(AsmPrinter &p, Operation *op, - IntegerOverflowFlags flags) { - if (flags == IntegerOverflowFlags::none) - return; - p << " overflow<"; - SmallVector<StringRef, 2> strs; - if (bitEnumContainsAny(flags, IntegerOverflowFlags::nsw)) - strs.push_back("nsw"); - if (bitEnumContainsAny(flags, IntegerOverflowFlags::nuw)) - strs.push_back("nuw"); - llvm::interleaveComma(strs, p); - p << ">"; -} - -//===----------------------------------------------------------------------===// // Attribute Helpers //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp b/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp index c4ef7d0bb9ff..47368532df16 100644 --- a/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp +++ b/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp @@ -731,9 +731,56 @@ struct ConcatSliceOptimization : public OpRewritePattern<tosa::SliceOp> { } }; +// Update size operand of tosa.slice if size has dynamic dims but corresponding +// output dim is static +struct SliceDynamicSizeCanonicalization + : public OpRewritePattern<tosa::SliceOp> { + using OpRewritePattern<tosa::SliceOp>::OpRewritePattern; + + LogicalResult matchAndRewrite(tosa::SliceOp sliceOp, + PatternRewriter &rewriter) const override { + ShapedType resultType = cast<ShapedType>(sliceOp.getType()); + + ElementsAttr sizeElems; + if (!matchPattern(sliceOp.getSize(), m_Constant(&sizeElems))) { + return rewriter.notifyMatchFailure( + sliceOp, "size of slice must be a static ranked shape"); + } + + llvm::SmallVector<int64_t> sliceSizes = + llvm::to_vector(sizeElems.getValues<int64_t>()); + + bool replaceSliceSize{false}; + // if size op has -1 indicating dynamic shape but corresponding dim on the + // output is statically known, update size to match with known output dim + // shape + for (const auto &[index, size] : llvm::enumerate(sliceSizes)) { + if (size == -1 && !resultType.isDynamicDim(index)) { + sliceSizes[index] = resultType.getDimSize(index); + replaceSliceSize = true; + } + } + + if (!replaceSliceSize) { + return rewriter.notifyMatchFailure( + sliceOp, "no dimension of size of slice is dynamic that resolves " + "to static output shape"); + } + + auto size_op = getTosaConstShape(rewriter, sliceOp.getLoc(), sliceSizes); + auto newSliceOp = rewriter.create<tosa::SliceOp>( + sliceOp.getLoc(), sliceOp.getType(), sliceOp.getInput1(), + sliceOp.getStart(), size_op); + + rewriter.replaceOp(sliceOp, newSliceOp.getResult()); + return success(); + } +}; + void SliceOp::getCanonicalizationPatterns(RewritePatternSet &results, MLIRContext *context) { - results.add<ConcatSliceOptimization>(context); + results.add<ConcatSliceOptimization, SliceDynamicSizeCanonicalization>( + context); } //===----------------------------------------------------------------------===// diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-unsupported.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-unsupported.mlir new file mode 100644 index 000000000000..90f2e5f047cd --- /dev/null +++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-unsupported.mlir @@ -0,0 +1,13 @@ +// RUN: mlir-opt %s -convert-gpu-to-rocdl -verify-diagnostics + +gpu.module @test_module { + // ROCDL lowering only suport shuffles for 32bit ints/floats, but they + // shouldn't crash on unsupported types. + func.func @gpu_shuffle_unsupported(%arg0 : vector<4xf16>) -> vector<4xf16> { + %offset = arith.constant 4 : i32 + %width = arith.constant 64 : i32 + // expected-error @+1 {{failed to legalize operation 'gpu.shuffle'}} + %shfl, %pred = gpu.shuffle xor %arg0, %offset, %width : vector<4xf16> + return %shfl : vector<4xf16> + } +} diff --git a/mlir/test/Dialect/GPU/shuffle-rewrite.mlir b/mlir/test/Dialect/GPU/shuffle-rewrite.mlir index 461825820153..c0ccae05a057 100644 --- a/mlir/test/Dialect/GPU/shuffle-rewrite.mlir +++ b/mlir/test/Dialect/GPU/shuffle-rewrite.mlir @@ -49,3 +49,14 @@ module { return } } + +// ----- + +// CHECK-LABEL: @gpu_shuffle_unsupported +func.func @gpu_shuffle_unsupported(%arg0 : vector<4xf16>) -> vector<4xf16> { + %offset = arith.constant 4 : i32 + %width = arith.constant 64 : i32 + // CHECK: gpu.shuffle xor %{{.*}}, %{{.*}}, %{{.*}} : vector<4xf16> + %shfl, %pred = gpu.shuffle xor %arg0, %offset, %width : vector<4xf16> + return %shfl : vector<4xf16> +} diff --git a/mlir/test/Dialect/Tosa/canonicalize.mlir b/mlir/test/Dialect/Tosa/canonicalize.mlir index b366b4f1e4fd..d153474593d8 100644 --- a/mlir/test/Dialect/Tosa/canonicalize.mlir +++ b/mlir/test/Dialect/Tosa/canonicalize.mlir @@ -1212,3 +1212,18 @@ func.func @do_not_fold_intdiv_division_by_0() -> tensor<1x24x2xi32> { %16 = tosa.intdiv %4, %1 : (tensor<1x24x2xi32>, tensor<1x24x2xi32>) -> tensor<1x24x2xi32> return %16 : tensor<1x24x2xi32> } + + +// ----- +// CHECK-LABEL: func.func @slice_dynamic_size_static_output_canonicalize( +// CHECK-SAME: %[[ARG0:.*]]: tensor<2x60x59x?xf32>) -> tensor<2x60x58x?xf32> { +// CHECK: %[[START:.*]] = tosa.const_shape {values = dense<0> : tensor<4xindex>} : () -> !tosa.shape<4> +// CHECK: %[[SIZE:.*]] = tosa.const_shape {values = dense<[2, 60, 58, -1]> : tensor<4xindex>} : () -> !tosa.shape<4> +// CHECK: %[[SLICE:.*]] = tosa.slice %[[ARG0]], %[[START]], %[[SIZE]] : (tensor<2x60x59x?xf32>, !tosa.shape<4>, !tosa.shape<4>) -> tensor<2x60x58x?xf32> +// CHECK: return %[[SLICE]] +func.func @slice_dynamic_size_static_output_canonicalize(%arg0: tensor<2x60x59x?xf32>) -> tensor<2x60x58x?xf32> { + %0 = tosa.const_shape {values = dense<0> : tensor<4xindex>} : () -> !tosa.shape<4> + %1 = tosa.const_shape {values = dense<[-1, 60, 58, -1]> : tensor<4xindex>} : () -> !tosa.shape<4> + %2 = tosa.slice %arg0, %0, %1 : (tensor<2x60x59x?xf32>, !tosa.shape<4>, !tosa.shape<4>) -> tensor<2x60x58x?xf32> + return %2 : tensor<2x60x58x?xf32> + } diff --git a/mlir/test/IR/enum-attr-invalid.mlir b/mlir/test/IR/enum-attr-invalid.mlir index 923736f28dad..2f240a56c987 100644 --- a/mlir/test/IR/enum-attr-invalid.mlir +++ b/mlir/test/IR/enum-attr-invalid.mlir @@ -28,3 +28,78 @@ func.func @test_parse_invalid_attr() -> () { // expected-error@+1 {{failed to parse TestEnumAttr parameter 'value'}} test.op_with_enum 1 : index } + +// ----- + +func.func @test_non_keyword_prop_enum() -> () { + // expected-error@+2 {{expected keyword for a test enum}} + // expected-error@+1 {{invalid value for property value, expected a test enum}} + test.op_with_enum_prop 0 + return +} + +// ----- + +func.func @test_wrong_keyword_prop_enum() -> () { + // expected-error@+2 {{expected one of [first, second, third] for a test enum, got: fourth}} + // expected-error@+1 {{invalid value for property value, expected a test enum}} + test.op_with_enum_prop fourth +} + +// ----- + +func.func @test_bad_integer() -> () { + // expected-error@+1 {{op property 'value' failed to satisfy constraint: a test enum}} + "test.op_with_enum_prop"() <{value = 4 : i32}> {} : () -> () +} + +// ----- + +func.func @test_bit_enum_prop_not_keyword() -> () { + // expected-error@+2 {{expected keyword for a test bit enum}} + // expected-error@+1 {{invalid value for property value1, expected a test bit enum}} + test.op_with_bit_enum_prop 0 + return +} + +// ----- + +func.func @test_bit_enum_prop_wrong_keyword() -> () { + // expected-error@+2 {{expected one of [read, write, execute] for a test bit enum, got: chroot}} + // expected-error@+1 {{invalid value for property value1, expected a test bit enum}} + test.op_with_bit_enum_prop read, chroot : () + return +} + +// ----- + +func.func @test_bit_enum_prop_bad_value() -> () { + // expected-error@+1 {{op property 'value2' failed to satisfy constraint: a test bit enum}} + "test.op_with_bit_enum_prop"() <{value1 = 7 : i32, value2 = 8 : i32}> {} : () -> () + return +} + +// ----- + +func.func @test_bit_enum_prop_named_wrong_keyword() -> () { + // expected-error@+2 {{expected 'bit_enum'}} + // expected-error@+1 {{invalid value for property value1, expected a test bit enum}} + test.op_with_bit_enum_prop_named foo<read, execute> + return +} + +// ----- + +func.func @test_bit_enum_prop_named_not_open() -> () { + // expected-error@+2 {{expected '<'}} + // expected-error@+1 {{invalid value for property value1, expected a test bit enum}} + test.op_with_bit_enum_prop_named bit_enum read, execute> +} + +// ----- + +func.func @test_bit_enum_prop_named_not_closed() -> () { + // expected-error@+2 {{expected '>'}} + // expected-error@+1 {{invalid value for property value1, expected a test bit enum}} + test.op_with_bit_enum_prop_named bit_enum<read, execute + +} diff --git a/mlir/test/IR/enum-attr-roundtrip.mlir b/mlir/test/IR/enum-attr-roundtrip.mlir index 36e605bdbff4..f1f09f977b7d 100644 --- a/mlir/test/IR/enum-attr-roundtrip.mlir +++ b/mlir/test/IR/enum-attr-roundtrip.mlir @@ -35,3 +35,48 @@ func.func @test_match_op_with_bit_enum() -> () { test.op_with_bit_enum <execute, write> tag 0 : i32 return } + +// CHECK-LABEL: @test_enum_prop +func.func @test_enum_prop() -> () { + // CHECK: test.op_with_enum_prop first + test.op_with_enum_prop first + + // CHECK: test.op_with_enum_prop first + "test.op_with_enum_prop"() <{value = 0 : i32}> {} : () -> () + + // CHECK: test.op_with_enum_prop_attr_form <{value = 0 : i32}> + test.op_with_enum_prop_attr_form <{value = 0 : i32}> + // CHECK: test.op_with_enum_prop_attr_form <{value = 1 : i32}> + test.op_with_enum_prop_attr_form <{value = #test<enum second>}> + + // CHECK: test.op_with_enum_prop_attr_form_always <{value = #test<enum first>}> + test.op_with_enum_prop_attr_form_always <{value = #test<enum first>}> + // CHECK: test.op_with_enum_prop_attr_form_always <{value = #test<enum second>} + test.op_with_enum_prop_attr_form_always <{value = #test<enum second>}> + + return +} + +// CHECK-LABEL @test_bit_enum_prop() +func.func @test_bit_enum_prop() -> () { + // CHECK: test.op_with_bit_enum_prop read : () + test.op_with_bit_enum_prop read read : () + + // CHECK: test.op_with_bit_enum_prop read, write write, execute + test.op_with_bit_enum_prop read, write write, execute : () + + // CHECK: test.op_with_bit_enum_prop read, execute write + "test.op_with_bit_enum_prop"() <{value1 = 5 : i32, value2 = 2 : i32}> {} : () -> () + + // CHECK: test.op_with_bit_enum_prop read, write, execute + test.op_with_bit_enum_prop read, write, execute : () + + // CHECK: test.op_with_bit_enum_prop_named bit_enum<read>{{$}} + test.op_with_bit_enum_prop_named bit_enum<read> bit_enum<read> + // CHECK: test.op_with_bit_enum_prop_named bit_enum<read, write> bit_enum<write, execute> + test.op_with_bit_enum_prop_named bit_enum<read, write> bit_enum<write, execute> + // CHECK: test.op_with_bit_enum_prop_named bit_enum<read, write, execute> + test.op_with_bit_enum_prop_named bit_enum<read, write, execute> + + return +} diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td index 31be00ace138..85a49e05d4c7 100644 --- a/mlir/test/lib/Dialect/Test/TestOps.td +++ b/mlir/test/lib/Dialect/Test/TestOps.td @@ -424,6 +424,52 @@ def : Pat<(OpWithEnum ConstantEnumCase<TestEnumAttr, "first">:$value, ConstantAttr<I32Attr, "1">)>; //===----------------------------------------------------------------------===// +// Test Enum Properties +//===----------------------------------------------------------------------===// + +// Define the enum property. +def TestEnumProp : EnumProp<TestEnum>; +// Define an op that contains the enum property. +def OpWithEnumProp : TEST_Op<"op_with_enum_prop"> { + let arguments = (ins TestEnumProp:$value); + let assemblyFormat = "$value attr-dict"; +} + +def TestEnumPropAttrForm : EnumPropWithAttrForm<TestEnum, TestEnumAttr>; +def OpWithEnumPropAttrForm : TEST_Op<"op_with_enum_prop_attr_form"> { + let arguments = (ins TestEnumPropAttrForm:$value); + let assemblyFormat = "prop-dict attr-dict"; +} + +def TestEnumPropAttrFormAlways : EnumPropWithAttrForm<TestEnum, TestEnumAttr> { + let storeInCustomAttribute = 1; +} +def OpWithEnumPropAttrFormAlways : TEST_Op<"op_with_enum_prop_attr_form_always"> { + let arguments = (ins TestEnumPropAttrFormAlways:$value); + let assemblyFormat = "prop-dict attr-dict"; +} + +def TestBitEnumProp : EnumProp<TestBitEnum> { + let defaultValue = TestBitEnum.cppType # "::Read"; +} +def OpWithTestBitEnum : TEST_Op<"op_with_bit_enum_prop"> { + let arguments = (ins + TestBitEnumProp:$value1, + TestBitEnumProp:$value2); + let assemblyFormat = "$value1 ($value2^)? attr-dict `:` `(``)`"; +} + +def TestBitEnumPropNamed : NamedEnumProp<TestBitEnum, "bit_enum"> { + let defaultValue = TestBitEnum.cppType # "::Read"; +} +def OpWithBitEnumPropNamed : TEST_Op<"op_with_bit_enum_prop_named"> { + let arguments = (ins + TestBitEnumPropNamed:$value1, + TestBitEnumPropNamed:$value2); + let assemblyFormat = "$value1 ($value2^)? attr-dict"; +} + +//===----------------------------------------------------------------------===// // Test Bit Enum Attributes //===----------------------------------------------------------------------===// |
