diff options
| author | Vitaly Buka <vitalybuka@google.com> | 2024-09-18 16:47:13 -0700 |
|---|---|---|
| committer | Vitaly Buka <vitalybuka@google.com> | 2024-09-18 16:47:13 -0700 |
| commit | 7f1bd09ed8fa47d24ed69d3c1f97afc7535d4615 (patch) | |
| tree | b9e7e3aa2fe9fbc0a11d10aaa86f43ea6b94128c | |
| parent | f080291d3a56d5d6767af925782d5a30c52c93ae (diff) | |
| parent | 999313debe8a87760b128e4469f17ec0ce1a4a8f (diff) | |
Created using spr 1.3.4
540 files changed, 28842 insertions, 10580 deletions
diff --git a/bolt/test/perf2bolt/lit.local.cfg b/bolt/test/perf2bolt/lit.local.cfg index 4ee9ad08cc78..0fecf913aa98 100644 --- a/bolt/test/perf2bolt/lit.local.cfg +++ b/bolt/test/perf2bolt/lit.local.cfg @@ -1,4 +1,5 @@ import shutil +import subprocess -if shutil.which("perf") is not None: - config.available_features.add("perf")
\ No newline at end of file +if shutil.which("perf") is not None and subprocess.run(["perf", "record", "-e", "cycles:u", "-o", "/dev/null", "--", "perf", "--version"], capture_output=True).returncode == 0: + config.available_features.add("perf") diff --git a/clang-tools-extra/CODE_OWNERS.TXT b/clang-tools-extra/CODE_OWNERS.TXT index 4cf80aa2b0b8..2831ec7e25f5 100644 --- a/clang-tools-extra/CODE_OWNERS.TXT +++ b/clang-tools-extra/CODE_OWNERS.TXT @@ -23,7 +23,7 @@ D: clang-tidy N: Manuel Klimek E: klimek@google.com -D: clang-rename, all parts of clang-tools-extra not covered by someone else +D: all parts of clang-tools-extra not covered by someone else N: Sam McCall E: sammccall@google.com diff --git a/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp b/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp index 200bb87a5ac3..4c75b4227011 100644 --- a/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp +++ b/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp @@ -380,7 +380,6 @@ void ClangTidyDiagnosticConsumer::HandleDiagnostic( ++Context.Stats.ErrorsIgnoredNOLINT; // Ignored a warning, should ignore related notes as well LastErrorWasIgnored = true; - Context.DiagEngine->Clear(); for (const auto &Error : SuppressionErrors) Context.diag(Error); return; @@ -457,7 +456,6 @@ void ClangTidyDiagnosticConsumer::HandleDiagnostic( if (Info.hasSourceManager()) checkFilters(Info.getLocation(), Info.getSourceManager()); - Context.DiagEngine->Clear(); for (const auto &Error : SuppressionErrors) Context.diag(Error); } diff --git a/clang-tools-extra/clang-tidy/modernize/AvoidCArraysCheck.cpp b/clang-tools-extra/clang-tidy/modernize/AvoidCArraysCheck.cpp index 89790ea70cf2..98778192dbd3 100644 --- a/clang-tools-extra/clang-tidy/modernize/AvoidCArraysCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/AvoidCArraysCheck.cpp @@ -9,6 +9,7 @@ #include "AvoidCArraysCheck.h" #include "clang/AST/ASTContext.h" #include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/ASTMatchers/ASTMatchers.h" using namespace clang::ast_matchers; @@ -60,6 +61,7 @@ void AvoidCArraysCheck::registerMatchers(MatchFinder *Finder) { Finder->addMatcher( typeLoc(hasValidBeginLoc(), hasType(arrayType()), + optionally(hasParent(parmVarDecl().bind("param_decl"))), unless(anyOf(hasParent(parmVarDecl(isArgvOfMain())), hasParent(varDecl(isExternC())), hasParent(fieldDecl( @@ -72,11 +74,28 @@ void AvoidCArraysCheck::registerMatchers(MatchFinder *Finder) { void AvoidCArraysCheck::check(const MatchFinder::MatchResult &Result) { const auto *ArrayType = Result.Nodes.getNodeAs<TypeLoc>("typeloc"); - + const bool IsInParam = + Result.Nodes.getNodeAs<ParmVarDecl>("param_decl") != nullptr; + const bool IsVLA = ArrayType->getTypePtr()->isVariableArrayType(); + enum class RecommendType { Array, Vector, Span }; + llvm::SmallVector<const char *> RecommendTypes{}; + if (IsVLA) { + RecommendTypes.push_back("std::vector<>"); + } else if (ArrayType->getTypePtr()->isIncompleteArrayType() && IsInParam) { + // in function parameter, we also don't know the size of + // IncompleteArrayType. + if (Result.Context->getLangOpts().CPlusPlus20) + RecommendTypes.push_back("std::span<>"); + else { + RecommendTypes.push_back("std::array<>"); + RecommendTypes.push_back("std::vector<>"); + } + } else { + RecommendTypes.push_back("std::array<>"); + } diag(ArrayType->getBeginLoc(), - "do not declare %select{C-style|C VLA}0 arrays, use " - "%select{std::array<>|std::vector<>}0 instead") - << ArrayType->getTypePtr()->isVariableArrayType(); + "do not declare %select{C-style|C VLA}0 arrays, use %1 instead") + << IsVLA << llvm::join(RecommendTypes, " or "); } } // namespace clang::tidy::modernize diff --git a/clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.cpp b/clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.cpp index a1786ba5acfd..1c6a1618ebbc 100644 --- a/clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.cpp @@ -262,7 +262,7 @@ StatementMatcher makeIteratorLoopMatcher(bool IsReverse) { /// EndVarName: 'j' (as a VarDecl) /// In the second example only: /// EndCallName: 'container.size()' (as a CXXMemberCallExpr) or -/// 'size(contaner)' (as a CallExpr) +/// 'size(container)' (as a CallExpr) /// /// Client code will need to make sure that: /// - The containers on which 'size()' is called is the container indexed. @@ -491,7 +491,7 @@ static bool isDirectMemberExpr(const Expr *E) { } /// Given an expression that represents an usage of an element from the -/// containter that we are iterating over, returns false when it can be +/// container that we are iterating over, returns false when it can be /// guaranteed this element cannot be modified as a result of this usage. static bool canBeModified(ASTContext *Context, const Expr *E) { if (E->getType().isConstQualified()) @@ -922,7 +922,7 @@ bool LoopConvertCheck::isConvertible(ASTContext *Context, const ast_matchers::BoundNodes &Nodes, const ForStmt *Loop, LoopFixerKind FixerKind) { - // In self contained diagnosics mode we don't want dependancies on other + // In self contained diagnostic mode we don't want dependencies on other // loops, otherwise, If we already modified the range of this for loop, don't // do any further updates on this iteration. if (areDiagsSelfContained()) diff --git a/clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.cpp b/clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.cpp index dbb50a060e59..698231d777d2 100644 --- a/clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.cpp @@ -13,30 +13,40 @@ using namespace clang::ast_matchers; namespace clang::tidy::readability { - void ContainerContainsCheck::registerMatchers(MatchFinder *Finder) { - const auto SupportedContainers = hasType( - hasUnqualifiedDesugaredType(recordType(hasDeclaration(cxxRecordDecl( - hasAnyName("::std::set", "::std::unordered_set", "::std::map", - "::std::unordered_map", "::std::multiset", - "::std::unordered_multiset", "::std::multimap", - "::std::unordered_multimap")))))); + const auto HasContainsMatchingParamType = hasMethod( + cxxMethodDecl(isConst(), parameterCountIs(1), returns(booleanType()), + hasName("contains"), unless(isDeleted()), isPublic(), + hasParameter(0, hasType(hasUnqualifiedDesugaredType( + equalsBoundNode("parameterType")))))); const auto CountCall = - cxxMemberCallExpr(on(SupportedContainers), - callee(cxxMethodDecl(hasName("count"))), - argumentCountIs(1)) + cxxMemberCallExpr( + argumentCountIs(1), + callee(cxxMethodDecl( + hasName("count"), + hasParameter(0, hasType(hasUnqualifiedDesugaredType( + type().bind("parameterType")))), + ofClass(cxxRecordDecl(HasContainsMatchingParamType))))) .bind("call"); const auto FindCall = - cxxMemberCallExpr(on(SupportedContainers), - callee(cxxMethodDecl(hasName("find"))), - argumentCountIs(1)) + cxxMemberCallExpr( + argumentCountIs(1), + callee(cxxMethodDecl( + hasName("find"), + hasParameter(0, hasType(hasUnqualifiedDesugaredType( + type().bind("parameterType")))), + ofClass(cxxRecordDecl(HasContainsMatchingParamType))))) .bind("call"); - const auto EndCall = cxxMemberCallExpr(on(SupportedContainers), - callee(cxxMethodDecl(hasName("end"))), - argumentCountIs(0)); + const auto EndCall = cxxMemberCallExpr( + argumentCountIs(0), + callee( + cxxMethodDecl(hasName("end"), + // In the matchers below, FindCall should always appear + // before EndCall so 'parameterType' is properly bound. + ofClass(cxxRecordDecl(HasContainsMatchingParamType))))); const auto Literal0 = integerLiteral(equals(0)); const auto Literal1 = integerLiteral(equals(1)); diff --git a/clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.h b/clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.h index 2e8276d684cd..753603ed8253 100644 --- a/clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.h +++ b/clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.h @@ -13,8 +13,9 @@ namespace clang::tidy::readability { -/// Finds usages of `container.count()` and `find() == end()` which should be -/// replaced by a call to the `container.contains()` method introduced in C++20. +/// Finds usages of `container.count()` and +/// `container.find() == container.end()` which should be replaced by a call +/// to the `container.contains()` method. /// /// For the user-facing documentation see: /// http://clang.llvm.org/extra/clang-tidy/checks/readability/container-contains.html @@ -24,10 +25,11 @@ public: : ClangTidyCheck(Name, Context) {} void registerMatchers(ast_matchers::MatchFinder *Finder) final; void check(const ast_matchers::MatchFinder::MatchResult &Result) final; - -protected: bool isLanguageVersionSupported(const LangOptions &LO) const final { - return LO.CPlusPlus20; + return LO.CPlusPlus; + } + std::optional<TraversalKind> getCheckTraversalKind() const override { + return TK_AsIs; } }; diff --git a/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp b/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp index 021d731f8f17..cf9b42828568 100644 --- a/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp +++ b/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp @@ -305,33 +305,33 @@ TEST_F(ConfigCompileTests, DiagnosticSuppression) { { auto D = DiagEngine.Report(diag::warn_unreachable); EXPECT_TRUE(isDiagnosticSuppressed( - Diag{&DiagEngine}, Conf.Diagnostics.Suppress, LangOptions())); + Diag{&DiagEngine, D}, Conf.Diagnostics.Suppress, LangOptions())); } // Subcategory not respected/suppressed. { auto D = DiagEngine.Report(diag::warn_unreachable_break); EXPECT_FALSE(isDiagnosticSuppressed( - Diag{&DiagEngine}, Conf.Diagnostics.Suppress, LangOptions())); + Diag{&DiagEngine, D}, Conf.Diagnostics.Suppress, LangOptions())); } { auto D = DiagEngine.Report(diag::warn_unused_variable); EXPECT_TRUE(isDiagnosticSuppressed( - Diag{&DiagEngine}, Conf.Diagnostics.Suppress, LangOptions())); + Diag{&DiagEngine, D}, Conf.Diagnostics.Suppress, LangOptions())); } { auto D = DiagEngine.Report(diag::err_typecheck_bool_condition); EXPECT_TRUE(isDiagnosticSuppressed( - Diag{&DiagEngine}, Conf.Diagnostics.Suppress, LangOptions())); + Diag{&DiagEngine, D}, Conf.Diagnostics.Suppress, LangOptions())); } { auto D = DiagEngine.Report(diag::err_unexpected_friend); EXPECT_TRUE(isDiagnosticSuppressed( - Diag{&DiagEngine}, Conf.Diagnostics.Suppress, LangOptions())); + Diag{&DiagEngine, D}, Conf.Diagnostics.Suppress, LangOptions())); } { auto D = DiagEngine.Report(diag::warn_alloca); EXPECT_TRUE(isDiagnosticSuppressed( - Diag{&DiagEngine}, Conf.Diagnostics.Suppress, LangOptions())); + Diag{&DiagEngine, D}, Conf.Diagnostics.Suppress, LangOptions())); } Frag.Diagnostics.Suppress.emplace_back("*"); diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 79501b563b4e..82a761bd7f40 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -87,9 +87,6 @@ Improvements to clang-doc Improvements to clang-query --------------------------- -Improvements to clang-rename ----------------------------- - The improvements are... Improvements to clang-tidy @@ -137,6 +134,11 @@ Changes in existing checks <clang-tidy/checks/misc/definitions-in-headers>` check by rewording the diagnostic note that suggests adding ``inline``. +- Improved :doc:`modernize-avoid-c-arrays + <clang-tidy/checks/modernize/avoid-c-arrays>` check to suggest using ``std::span`` + as a replacement for parameters of incomplete C array type in C++20 and + ``std::array`` or ``std::vector`` before C++20. + - Improved :doc:`modernize-use-std-format <clang-tidy/checks/modernize/use-std-format>` check to support replacing member function calls too. @@ -163,6 +165,10 @@ Changes in existing checks <clang-tidy/checks/performance/avoid-endl>` check to use ``std::endl`` as placeholder when lexer cannot get source text. +- Improved :doc:`readability-container-contains + <clang-tidy/checks/readability/container-contains>` check to let it work on + any class that has a ``contains`` method. + - Improved :doc:`readability-implicit-bool-conversion <clang-tidy/checks/readability/implicit-bool-conversion>` check by adding the option `UseUpperCaseLiteralSuffix` to select the diff --git a/clang-tools-extra/docs/clang-rename.rst b/clang-tools-extra/docs/clang-rename.rst deleted file mode 100644 index e13d8c3ad25f..000000000000 --- a/clang-tools-extra/docs/clang-rename.rst +++ /dev/null @@ -1,168 +0,0 @@ -============ -Clang-Rename -============ - -.. contents:: - -See also: - -.. toctree:: - :maxdepth: 1 - - -:program:`clang-rename` is a C++ refactoring tool. Its purpose is to perform -efficient renaming actions in large-scale projects such as renaming classes, -functions, variables, arguments, namespaces etc. - -The tool is in a very early development stage, so you might encounter bugs and -crashes. Submitting reports with information about how to reproduce the issue -to `the LLVM bugtracker <https://bugs.llvm.org>`_ will definitely help the -project. If you have any ideas or suggestions, you might want to put a feature -request there. - -Using Clang-Rename -================== - -:program:`clang-rename` is a `LibTooling -<https://clang.llvm.org/docs/LibTooling.html>`_-based tool, and it's easier to -work with if you set up a compile command database for your project (for an -example of how to do this see `How To Setup Tooling For LLVM -<https://clang.llvm.org/docs/HowToSetupToolingForLLVM.html>`_). You can also -specify compilation options on the command line after `--`: - -.. code-block:: console - - $ clang-rename -offset=42 -new-name=foo test.cpp -- -Imy_project/include -DMY_DEFINES ... - - -To get an offset of a symbol in a file run - -.. code-block:: console - - $ grep -FUbo 'foo' file.cpp - - -The tool currently supports renaming actions inside a single translation unit -only. It is planned to extend the tool's functionality to support multi-TU -renaming actions in the future. - -:program:`clang-rename` also aims to be easily integrated into popular text -editors, such as Vim and Emacs, and improve the workflow of users. - -Although a command line interface exists, it is highly recommended to use the -text editor interface instead for better experience. - -You can also identify one or more symbols to be renamed by giving the fully -qualified name: - -.. code-block:: console - - $ clang-rename -qualified-name=foo -new-name=bar test.cpp - -Renaming multiple symbols at once is supported, too. However, -:program:`clang-rename` doesn't accept both `-offset` and `-qualified-name` at -the same time. So, you can either specify multiple `-offset` or -`-qualified-name`. - -.. code-block:: console - - $ clang-rename -offset=42 -new-name=bar1 -offset=150 -new-name=bar2 test.cpp - -or - -.. code-block:: console - - $ clang-rename -qualified-name=foo1 -new-name=bar1 -qualified-name=foo2 -new-name=bar2 test.cpp - - -Alternatively, {offset | qualified-name} / new-name pairs can be put into a YAML -file: - -.. code-block:: yaml - - --- - - Offset: 42 - NewName: bar1 - - Offset: 150 - NewName: bar2 - ... - -or - -.. code-block:: yaml - - --- - - QualifiedName: foo1 - NewName: bar1 - - QualifiedName: foo2 - NewName: bar2 - ... - -That way you can avoid spelling out all the names as command line arguments: - -.. code-block:: console - - $ clang-rename -input=test.yaml test.cpp - -:program:`clang-rename` offers the following options: - -.. code-block:: console - - $ clang-rename --help - USAGE: clang-rename [subcommand] [options] <source0> [... <sourceN>] - - OPTIONS: - - Generic Options: - - -help - Display available options (-help-hidden for more) - -help-list - Display list of available options (-help-list-hidden for more) - -version - Display the version of this program - - clang-rename common options: - - -export-fixes=<filename> - YAML file to store suggested fixes in. - -extra-arg=<string> - Additional argument to append to the compiler command line - Can be used several times. - -extra-arg-before=<string> - Additional argument to prepend to the compiler command line - Can be used several times. - -force - Ignore nonexistent qualified names. - -i - Overwrite edited <file>s. - -input=<string> - YAML file to load oldname-newname pairs from. - -new-name=<string> - The new name to change the symbol to. - -offset=<uint> - Locates the symbol by offset as opposed to <line>:<column>. - -p <string> - Build path - -pl - Print the locations affected by renaming to stderr. - -pn - Print the found symbol's name prior to renaming to stderr. - -qualified-name=<string> - The fully qualified name of the symbol. - -Vim Integration -=============== - -You can call :program:`clang-rename` directly from Vim! To set up -:program:`clang-rename` integration for Vim see -`clang/tools/clang-rename/clang-rename.py -<https://github.com/llvm/llvm-project/blob/main/clang/tools/clang-rename/clang-rename.py>`_. - -Please note that **you have to save all buffers, in which the replacement will -happen before running the tool**. - -Once installed, you can point your cursor to symbols you want to rename, press -`<leader>cr` and type new desired name. The `<leader> key -<http://vim.wikia.com/wiki/Mapping_keys_in_Vim_-_Tutorial_(Part_3)#Map_leader>`_ -is a reference to a specific key defined by the mapleader variable and is bound -to backslash by default. - -Emacs Integration -================= - -You can also use :program:`clang-rename` while using Emacs! To set up -:program:`clang-rename` integration for Emacs see -`clang-rename/tool/clang-rename.el -<https://github.com/llvm/llvm-project/blob/main/clang/tools/clang-rename/clang-rename.el>`_. - -Once installed, you can point your cursor to symbols you want to rename, press -`M-X`, type `clang-rename` and new desired name. - -Please note that **you have to save all buffers, in which the replacement will -happen before running the tool**. diff --git a/clang-tools-extra/docs/clang-tidy/checks/modernize/avoid-c-arrays.rst b/clang-tools-extra/docs/clang-tidy/checks/modernize/avoid-c-arrays.rst index 8f13ca4466a3..2d72352989ab 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/modernize/avoid-c-arrays.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/modernize/avoid-c-arrays.rst @@ -10,6 +10,9 @@ modernize-avoid-c-arrays Finds C-style array types and recommend to use ``std::array<>`` / ``std::vector<>``. All types of C arrays are diagnosed. +For incomplete C-style array types appeared in parameters, It would be better to +use ``std::span`` / ``gsl::span`` as replacement. + However, fix-it are potentially dangerous in header files and are therefore not emitted right now. diff --git a/clang-tools-extra/docs/clang-tidy/checks/readability/container-contains.rst b/clang-tools-extra/docs/clang-tidy/checks/readability/container-contains.rst index b28daecf7a2c..1cfbf4c511c5 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/readability/container-contains.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/readability/container-contains.rst @@ -3,23 +3,31 @@ readability-container-contains ============================== -Finds usages of ``container.count()`` and ``container.find() == container.end()`` which should be replaced by a call to the ``container.contains()`` method introduced in C++20. +Finds usages of ``container.count()`` and +``container.find() == container.end()`` which should be replaced by a call to +the ``container.contains()`` method. -Whether an element is contained inside a container should be checked with ``contains`` instead of ``count``/``find`` because ``contains`` conveys the intent more clearly. Furthermore, for containers which permit multiple entries per key (``multimap``, ``multiset``, ...), ``contains`` is more efficient than ``count`` because ``count`` has to do unnecessary additional work. +Whether an element is contained inside a container should be checked with +``contains`` instead of ``count``/``find`` because ``contains`` conveys the +intent more clearly. Furthermore, for containers which permit multiple entries +per key (``multimap``, ``multiset``, ...), ``contains`` is more efficient than +``count`` because ``count`` has to do unnecessary additional work. Examples: -=========================================== ============================== -Initial expression Result -------------------------------------------- ------------------------------ -``myMap.find(x) == myMap.end()`` ``!myMap.contains(x)`` -``myMap.find(x) != myMap.end()`` ``myMap.contains(x)`` -``if (myMap.count(x))`` ``if (myMap.contains(x))`` -``bool exists = myMap.count(x)`` ``bool exists = myMap.contains(x)`` -``bool exists = myMap.count(x) > 0`` ``bool exists = myMap.contains(x)`` -``bool exists = myMap.count(x) >= 1`` ``bool exists = myMap.contains(x)`` -``bool missing = myMap.count(x) == 0`` ``bool missing = !myMap.contains(x)`` -=========================================== ============================== +====================================== ===================================== +Initial expression Result +-------------------------------------- ------------------------------------- +``myMap.find(x) == myMap.end()`` ``!myMap.contains(x)`` +``myMap.find(x) != myMap.end()`` ``myMap.contains(x)`` +``if (myMap.count(x))`` ``if (myMap.contains(x))`` +``bool exists = myMap.count(x)`` ``bool exists = myMap.contains(x)`` +``bool exists = myMap.count(x) > 0`` ``bool exists = myMap.contains(x)`` +``bool exists = myMap.count(x) >= 1`` ``bool exists = myMap.contains(x)`` +``bool missing = myMap.count(x) == 0`` ``bool missing = !myMap.contains(x)`` +====================================== ===================================== -This check applies to ``std::set``, ``std::unordered_set``, ``std::map``, ``std::unordered_map`` and the corresponding multi-key variants. -It is only active for C++20 and later, as the ``contains`` method was only added in C++20. +This check will apply to any class that has a ``contains`` method, notably +including ``std::set``, ``std::unordered_set``, ``std::map``, and +``std::unordered_map`` as of C++20, and ``std::string`` and ``std::string_view`` +as of C++23. diff --git a/clang-tools-extra/docs/index.rst b/clang-tools-extra/docs/index.rst index d5c00b89a155..9f7324fcf741 100644 --- a/clang-tools-extra/docs/index.rst +++ b/clang-tools-extra/docs/index.rst @@ -19,7 +19,6 @@ Contents clang-include-fixer modularize pp-trace - clang-rename clangd <https://clangd.llvm.org/> clang-doc diff --git a/clang-tools-extra/test/CMakeLists.txt b/clang-tools-extra/test/CMakeLists.txt index 0953ff2531e1..d72a117166a0 100644 --- a/clang-tools-extra/test/CMakeLists.txt +++ b/clang-tools-extra/test/CMakeLists.txt @@ -28,9 +28,6 @@ configure_lit_site_cfg( ) set(CLANG_TOOLS_TEST_DEPS - # For the clang-apply-replacements test that uses clang-rename. - clang-rename - # For the clang-doc tests that emit bitcode files. llvm-bcanalyzer diff --git a/clang-tools-extra/test/clang-apply-replacements/ClangRenameClassReplacements.cpp b/clang-tools-extra/test/clang-apply-replacements/ClangRenameClassReplacements.cpp deleted file mode 100644 index 2b478bbf900d..000000000000 --- a/clang-tools-extra/test/clang-apply-replacements/ClangRenameClassReplacements.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// RUN: rm -rf %t -// RUN: mkdir -p %t/fixes -// RUN: cat %s > %t.cpp -// RUN: clang-rename -offset=254 -new-name=Bar -export-fixes=%t/fixes/clang-rename.yaml %t.cpp -- -// RUN: clang-apply-replacements %t -// RUN: sed 's,//.*,,' %t.cpp | FileCheck %s - -class Foo {}; // CHECK: class Bar {}; - -// Use grep -FUbo 'Foo' <file> to get the correct offset of Cla when changing -// this file. diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-c++20.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-c++20.cpp new file mode 100644 index 000000000000..e53cfeba0e46 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-c++20.cpp @@ -0,0 +1,11 @@ +// RUN: %check_clang_tidy -std=c++20 %s modernize-avoid-c-arrays %t + +int f1(int data[], int size) { + // CHECK-MESSAGES: :[[@LINE-1]]:8: warning: do not declare C-style arrays, use std::span<> instead + int f4[] = {1, 2}; + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: do not declare C-style arrays, use std::array<> instead +} + +int f2(int data[100]) { + // CHECK-MESSAGES: :[[@LINE-1]]:8: warning: do not declare C-style arrays, use std::array<> instead +} diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-ignores-main.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-ignores-main.cpp index 6549422f393a..ad12b3d6f95b 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-ignores-main.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-ignores-main.cpp @@ -1,7 +1,7 @@ -// RUN: %check_clang_tidy %s modernize-avoid-c-arrays %t +// RUN: %check_clang_tidy -std=c++17 %s modernize-avoid-c-arrays %t int not_main(int argc, char *argv[]) { - // CHECK-MESSAGES: :[[@LINE-1]]:24: warning: do not declare C-style arrays, use std::array<> instead + // CHECK-MESSAGES: :[[@LINE-1]]:24: warning: do not declare C-style arrays, use std::array<> or std::vector<> instead int f4[] = {1, 2}; // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: do not declare C-style arrays, use std::array<> instead } @@ -11,7 +11,7 @@ int main(int argc, char *argv[]) { // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: do not declare C-style arrays, use std::array<> instead auto not_main = [](int argc, char *argv[]) { - // CHECK-MESSAGES: :[[@LINE-1]]:32: warning: do not declare C-style arrays, use std::array<> instead + // CHECK-MESSAGES: :[[@LINE-1]]:32: warning: do not declare C-style arrays, use std::array<> or std::vector<> instead int f6[] = {1, 2}; // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: do not declare C-style arrays, use std::array<> instead }; diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-ignores-strings.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-ignores-strings.cpp index f6d64848f9e3..b607068f5b7c 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-ignores-strings.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-ignores-strings.cpp @@ -1,4 +1,4 @@ -// RUN: %check_clang_tidy %s modernize-avoid-c-arrays %t -- \ +// RUN: %check_clang_tidy -std=c++17 %s modernize-avoid-c-arrays %t -- \ // RUN: -config='{CheckOptions: { modernize-avoid-c-arrays.AllowStringArrays: true }}' const char name[] = "name"; @@ -6,4 +6,4 @@ const char array[] = {'n', 'a', 'm', 'e', '\0'}; // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: do not declare C-style arrays, use std::array<> instead [modernize-avoid-c-arrays] void takeCharArray(const char name[]); -// CHECK-MESSAGES: :[[@LINE-1]]:26: warning: do not declare C-style arrays, use std::array<> instead [modernize-avoid-c-arrays] +// CHECK-MESSAGES: :[[@LINE-1]]:26: warning: do not declare C-style arrays, use std::array<> or std::vector<> instead [modernize-avoid-c-arrays] diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-ignores-three-arg-main.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-ignores-three-arg-main.cpp index 22a4016f79f4..c04edf2b5aea 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-ignores-three-arg-main.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-ignores-three-arg-main.cpp @@ -1,8 +1,8 @@ -// RUN: %check_clang_tidy %s modernize-avoid-c-arrays %t +// RUN: %check_clang_tidy -std=c++17 %s modernize-avoid-c-arrays %t int not_main(int argc, char *argv[], char *argw[]) { - // CHECK-MESSAGES: :[[@LINE-1]]:24: warning: do not declare C-style arrays, use std::array<> instead - // CHECK-MESSAGES: :[[@LINE-2]]:38: warning: do not declare C-style arrays, use std::array<> instead + // CHECK-MESSAGES: :[[@LINE-1]]:24: warning: do not declare C-style arrays, use std::array<> or std::vector<> instead + // CHECK-MESSAGES: :[[@LINE-2]]:38: warning: do not declare C-style arrays, use std::array<> or std::vector<> instead int f4[] = {1, 2}; // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: do not declare C-style arrays, use std::array<> instead } @@ -12,8 +12,8 @@ int main(int argc, char *argv[], char *argw[]) { // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: do not declare C-style arrays, use std::array<> instead auto not_main = [](int argc, char *argv[], char *argw[]) { - // CHECK-MESSAGES: :[[@LINE-1]]:32: warning: do not declare C-style arrays, use std::array<> instead - // CHECK-MESSAGES: :[[@LINE-2]]:46: warning: do not declare C-style arrays, use std::array<> instead + // CHECK-MESSAGES: :[[@LINE-1]]:32: warning: do not declare C-style arrays, use std::array<> or std::vector<> instead + // CHECK-MESSAGES: :[[@LINE-2]]:46: warning: do not declare C-style arrays, use std::array<> or std::vector<> instead int f6[] = {1, 2}; // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: do not declare C-style arrays, use std::array<> instead }; diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays.cpp index ce99f0821b22..b0aaa4962a83 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays.cpp @@ -1,4 +1,4 @@ -// RUN: %check_clang_tidy %s modernize-avoid-c-arrays %t +// RUN: %check_clang_tidy -std=c++17 %s modernize-avoid-c-arrays %t int a[] = {1, 2}; // CHECK-MESSAGES: :[[@LINE-1]]:1: warning: do not declare C-style arrays, use std::array<> instead @@ -91,4 +91,4 @@ const char name[] = "Some string"; // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: do not declare C-style arrays, use std::array<> instead [modernize-avoid-c-arrays] void takeCharArray(const char name[]); -// CHECK-MESSAGES: :[[@LINE-1]]:26: warning: do not declare C-style arrays, use std::array<> instead [modernize-avoid-c-arrays] +// CHECK-MESSAGES: :[[@LINE-1]]:26: warning: do not declare C-style arrays, use std::array<> or std::vector<> instead [modernize-avoid-c-arrays] diff --git a/clang-tools-extra/test/clang-tidy/checkers/performance/unnecessary-value-param-crash.cpp b/clang-tools-extra/test/clang-tidy/checkers/performance/unnecessary-value-param-crash.cpp new file mode 100644 index 000000000000..99c2fe905bdf --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/performance/unnecessary-value-param-crash.cpp @@ -0,0 +1,23 @@ +// RUN: %check_clang_tidy -std=c++14-or-later %s performance-unnecessary-value-param %t + +// The test case used to crash clang-tidy. +// https://github.com/llvm/llvm-project/issues/108963 + +struct A +{ + template<typename T> A(T&&) {} +}; + +struct B +{ + ~B(); +}; + +struct C +{ + A a; + C(B, int i) : a(i) {} + // CHECK-MESSAGES: [[@LINE-1]]:6: warning: the parameter #1 is copied for each invocation but only used as a const reference; consider making it a const reference +}; + +C c(B(), 0); diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/container-contains.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/container-contains.cpp index 0ecb61b2e7df..906515b075d4 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/readability/container-contains.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/readability/container-contains.cpp @@ -240,7 +240,7 @@ int testMacroExpansion(std::unordered_set<int> &MySet) { return 0; } -// The following map has the same interface like `std::map`. +// The following map has the same interface as `std::map`. template <class Key, class T> struct CustomMap { unsigned count(const Key &K) const; @@ -249,13 +249,180 @@ struct CustomMap { void *end(); }; -// The clang-tidy check is currently hard-coded against the `std::` containers -// and hence won't annotate the following instance. We might change this in the -// future and also detect the following case. -void *testDifferentCheckTypes(CustomMap<int, int> &MyMap) { - if (MyMap.count(0)) - // NO-WARNING. - // CHECK-FIXES: if (MyMap.count(0)) - return nullptr; - return MyMap.find(2); +void testDifferentCheckTypes(CustomMap<int, int> &MyMap) { + if (MyMap.count(0)) {}; + // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use 'contains' to check for membership [readability-container-contains] + // CHECK-FIXES: if (MyMap.contains(0)) {}; + + MyMap.find(0) != MyMap.end(); + // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use 'contains' to check for membership [readability-container-contains] + // CHECK-FIXES: MyMap.contains(0); +} + +struct MySubmap : public CustomMap<int, int> {}; + +void testSubclass(MySubmap& MyMap) { + if (MyMap.count(0)) {}; + // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use 'contains' to check for membership [readability-container-contains] + // CHECK-FIXES: if (MyMap.contains(0)) {}; +} + +using UsingMap = CustomMap<int, int>; +struct MySubmap2 : public UsingMap {}; +using UsingMap2 = MySubmap2; + +void testUsing(UsingMap2& MyMap) { + if (MyMap.count(0)) {}; + // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use 'contains' to check for membership [readability-container-contains] + // CHECK-FIXES: if (MyMap.contains(0)) {}; +} + +template <class Key, class T> +struct CustomMapContainsDeleted { + unsigned count(const Key &K) const; + bool contains(const Key &K) const = delete; + void *find(const Key &K); + void *end(); +}; + +struct SubmapContainsDeleted : public CustomMapContainsDeleted<int, int> {}; + +void testContainsDeleted(CustomMapContainsDeleted<int, int> &MyMap, + SubmapContainsDeleted &MyMap2) { + // No warning if the `contains` method is deleted. + if (MyMap.count(0)) {}; + if (MyMap2.count(0)) {}; +} + +template <class Key, class T> +struct CustomMapPrivateContains { + unsigned count(const Key &K) const; + void *find(const Key &K); + void *end(); + +private: + bool contains(const Key &K) const; +}; + +struct SubmapPrivateContains : public CustomMapPrivateContains<int, int> {}; + +void testPrivateContains(CustomMapPrivateContains<int, int> &MyMap, + SubmapPrivateContains &MyMap2) { + // No warning if the `contains` method is not public. + if (MyMap.count(0)) {}; + if (MyMap2.count(0)) {}; +} + +struct MyString {}; + +struct WeirdNonMatchingContains { + unsigned count(char) const; + bool contains(const MyString&) const; +}; + +void testWeirdNonMatchingContains(WeirdNonMatchingContains &MyMap) { + // No warning if there is no `contains` method with the right type. + if (MyMap.count('a')) {}; +} + +template <class T> +struct SmallPtrSet { + using ConstPtrType = const T*; + unsigned count(ConstPtrType Ptr) const; + bool contains(ConstPtrType Ptr) const; +}; + +template <class T> +struct SmallPtrPtrSet { + using ConstPtrType = const T**; + unsigned count(ConstPtrType Ptr) const; + bool contains(ConstPtrType Ptr) const; +}; + +template <class T> +struct SmallPtrPtrPtrSet { + using ConstPtrType = const T***; + unsigned count(ConstPtrType Ptr) const; + bool contains(ConstPtrType Ptr) const; +}; + +void testSmallPtrSet(const int ***Ptr, + SmallPtrSet<int> &MySet, + SmallPtrPtrSet<int> &MySet2, + SmallPtrPtrPtrSet<int> &MySet3) { + if (MySet.count(**Ptr)) {}; + // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use 'contains' to check for membership [readability-container-contains] + // CHECK-FIXES: if (MySet.contains(**Ptr)) {}; + if (MySet2.count(*Ptr)) {}; + // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use 'contains' to check for membership [readability-container-contains] + // CHECK-FIXES: if (MySet2.contains(*Ptr)) {}; + if (MySet3.count(Ptr)) {}; + // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use 'contains' to check for membership [readability-container-contains] + // CHECK-FIXES: if (MySet3.contains(Ptr)) {}; +} + +struct X {}; +struct Y : public X {}; + +void testSubclassEntry(SmallPtrSet<X>& Set, Y* Entry) { + if (Set.count(Entry)) {} + // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use 'contains' to check for membership [readability-container-contains] + // CHECK-FIXES: if (Set.contains(Entry)) {} +} + +struct WeirdPointerApi { + unsigned count(int** Ptr) const; + bool contains(int* Ptr) const; +}; + +void testWeirdApi(WeirdPointerApi& Set, int* E) { + if (Set.count(&E)) {} +} + +void testIntUnsigned(std::set<int>& S, unsigned U) { + if (S.count(U)) {} + // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use 'contains' to check for membership [readability-container-contains] + // CHECK-FIXES: if (S.contains(U)) {} +} + +template <class T> +struct CustomSetConvertible { + unsigned count(const T &K) const; + bool contains(const T &K) const; +}; + +struct A {}; +struct B { B() = default; B(const A&) {} }; +struct C { operator A() const; }; + +void testConvertibleTypes() { + CustomSetConvertible<B> MyMap; + if (MyMap.count(A())) {}; + // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use 'contains' to check for membership [readability-container-contains] + // CHECK-FIXES: if (MyMap.contains(A())) {}; + + CustomSetConvertible<A> MyMap2; + if (MyMap2.count(C())) {}; + // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use 'contains' to check for membership [readability-container-contains] + // CHECK-FIXES: if (MyMap2.contains(C())) {}; + + if (MyMap2.count(C()) != 0) {}; + // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use 'contains' to check for membership [readability-container-contains] + // CHECK-FIXES: if (MyMap2.contains(C())) {}; +} + +template<class U> +using Box = const U& ; + +template <class T> +struct CustomBoxedSet { + unsigned count(Box<T> K) const; + bool contains(Box<T> K) const; +}; + +void testBox() { + CustomBoxedSet<int> Set; + if (Set.count(0)) {}; + // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use 'contains' to check for membership [readability-container-contains] + // CHECK-FIXES: if (Set.contains(0)) {}; } diff --git a/clang/docs/ClangFormattedStatus.rst b/clang/docs/ClangFormattedStatus.rst index 0ee0782879ef..b917e077679b 100644 --- a/clang/docs/ClangFormattedStatus.rst +++ b/clang/docs/ClangFormattedStatus.rst @@ -809,11 +809,6 @@ tree in terms of conformance to :doc:`ClangFormat` as of: March 06, 2022 17:32:2 - `4` - `0` - :good:`100%` - * - clang/tools/clang-rename - - `1` - - `1` - - `0` - - :good:`100%` * - clang/tools/clang-repl - `1` - `1` diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index c08697282cbf..f62f90fb9650 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -667,6 +667,7 @@ Unless specified otherwise operation(±0) = ±0 and operation(±infinity) = ±in T __builtin_elementwise_log(T x) return the natural logarithm of x floating point types T __builtin_elementwise_log2(T x) return the base 2 logarithm of x floating point types T __builtin_elementwise_log10(T x) return the base 10 logarithm of x floating point types + T __builtin_elementwise_popcount(T x) return the number of 1 bits in x integer types T __builtin_elementwise_pow(T x, T y) return x raised to the power of y floating point types T __builtin_elementwise_bitreverse(T x) return the integer represented after reversing the bits of x integer types T __builtin_elementwise_exp(T x) returns the base-e exponential, e^x, of the specified value floating point types diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index d92b59334f8f..d10b28431007 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -39,6 +39,8 @@ code bases. - The ``le32`` and ``le64`` targets have been removed. +- The ``clang-rename`` tool has been removed. + C/C++ Language Potentially Breaking Changes ------------------------------------------- @@ -114,6 +116,7 @@ C++ Language Changes - Accept C++26 user-defined ``static_assert`` messages in C++11 as an extension. +- Add ``__builtin_elementwise_popcount`` builtin for integer types only. C++2c Feature Support ^^^^^^^^^^^^^^^^^^^^^ @@ -252,7 +255,10 @@ Attribute Changes in Clang (#GH106864) - Introduced a new attribute ``[[clang::coro_await_elidable]]`` on coroutine return types - to express elideability at call sites where the coroutine is co_awaited as a prvalue. + to express elideability at call sites where the coroutine is invoked under a safe elide context. + +- Introduced a new attribute ``[[clang::coro_await_elidable_argument]]`` on function parameters + to propagate safe elide context to arguments if such function is also under a safe elide context. Improvements to Clang's diagnostics ----------------------------------- @@ -392,8 +398,11 @@ Bug Fixes to C++ Support - A follow-up fix was added for (#GH61460), as the previous fix was not entirely correct. (#GH86361) - Fixed a crash in the typo correction of an invalid CTAD guide. (#GH107887) - Fixed a crash when clang tries to subtitute parameter pack while retaining the parameter - pack. #GH63819, #GH107560 + pack. (#GH63819), (#GH107560) - Fix a crash when a static assert declaration has an invalid close location. (#GH108687) +- Avoided a redundant friend declaration instantiation under a certain ``consteval`` context. (#GH107175) +- Fixed an assertion failure in debug mode, and potential crashes in release mode, when + diagnosing a failed cast caused indirectly by a failed implicit conversion to the type of the constructor parameter. Bug Fixes to AST Handling ^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/docs/tools/clang-formatted-files.txt b/clang/docs/tools/clang-formatted-files.txt index 48ded9c75455..fa40ea74fb7e 100644 --- a/clang/docs/tools/clang-formatted-files.txt +++ b/clang/docs/tools/clang-formatted-files.txt @@ -608,7 +608,6 @@ clang/tools/clang-refactor/ClangRefactor.cpp clang/tools/clang-refactor/TestSupport.cpp clang/tools/clang-refactor/TestSupport.h clang/tools/clang-refactor/ToolRefactoringResultConsumer.h -clang/tools/clang-rename/ClangRename.cpp clang/tools/clang-repl/ClangRepl.cpp clang/tools/clang-scan-deps/ClangScanDeps.cpp clang/tools/clang-shlib/clang-shlib.cpp diff --git a/clang/include/clang/Analysis/Analyses/ExprMutationAnalyzer.h b/clang/include/clang/Analysis/Analyses/ExprMutationAnalyzer.h index b7b84852168e..c7a5b016c949 100644 --- a/clang/include/clang/Analysis/Analyses/ExprMutationAnalyzer.h +++ b/clang/include/clang/Analysis/Analyses/ExprMutationAnalyzer.h @@ -118,10 +118,19 @@ public: static FunctionParmMutationAnalyzer * getFunctionParmMutationAnalyzer(const FunctionDecl &Func, ASTContext &Context, ExprMutationAnalyzer::Memoized &Memorized) { - auto [it, Inserted] = Memorized.FuncParmAnalyzer.try_emplace(&Func); - if (Inserted) - it->second = std::unique_ptr<FunctionParmMutationAnalyzer>( - new FunctionParmMutationAnalyzer(Func, Context, Memorized)); + auto it = Memorized.FuncParmAnalyzer.find(&Func); + if (it == Memorized.FuncParmAnalyzer.end()) { + // Creating a new instance of FunctionParmMutationAnalyzer below may add + // additional elements to FuncParmAnalyzer. If we did try_emplace before + // creating a new instance, the returned iterator of try_emplace could be + // invalidated. + it = + Memorized.FuncParmAnalyzer + .try_emplace(&Func, std::unique_ptr<FunctionParmMutationAnalyzer>( + new FunctionParmMutationAnalyzer( + Func, Context, Memorized))) + .first; + } return it->getSecond().get(); } diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 35b9716e13ff..ce86116680d7 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1258,6 +1258,14 @@ def CoroAwaitElidable : InheritableAttr { let SimpleHandler = 1; } +def CoroAwaitElidableArgument : InheritableAttr { + let Spellings = [Clang<"coro_await_elidable_argument">]; + let Subjects = SubjectList<[ParmVar]>; + let LangOpts = [CPlusPlus]; + let Documentation = [CoroAwaitElidableArgumentDoc]; + let SimpleHandler = 1; +} + // OSObject-based attributes. def OSConsumed : InheritableParamAttr { let Spellings = [Clang<"os_consumed">]; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index cc9bc499c9cc..8ef151b3f2fd 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -8258,15 +8258,23 @@ but do not pass them to the underlying coroutine or pass them by value. def CoroAwaitElidableDoc : Documentation { let Category = DocCatDecl; let Content = [{ -The ``[[clang::coro_await_elidable]]`` is a class attribute which can be applied -to a coroutine return type. +The ``[[clang::coro_await_elidable]]`` is a class attribute which can be +applied to a coroutine return type. It provides a hint to the compiler to apply +Heap Allocation Elision more aggressively. -When a coroutine function that returns such a type calls another coroutine function, -the compiler performs heap allocation elision when the call to the coroutine function -is immediately co_awaited as a prvalue. In this case, the coroutine frame for the -callee will be a local variable within the enclosing braces in the caller's stack -frame. And the local variable, like other variables in coroutines, may be collected -into the coroutine frame, which may be allocated on the heap. +When a coroutine function returns such a type, a direct call expression therein +that returns a prvalue of a type attributed ``[[clang::coro_await_elidable]]`` +is said to be under a safe elide context if one of the following is true: +- it is the immediate right-hand side operand to a co_await expression. +- it is an argument to a ``[[clang::coro_await_elidable_argument]]`` parameter +or parameter pack of another direct call expression under a safe elide context. + +Do note that the safe elide context applies only to the call expression itself, +and the context does not transitively include any of its subexpressions unless +exceptional rules of ``[[clang::coro_await_elidable_argument]]`` apply. + +The compiler performs heap allocation elision on call expressions under a safe +elide context, if the callee is a coroutine. Example: @@ -8281,8 +8289,63 @@ Example: co_await t; } -The behavior is undefined if the caller coroutine is destroyed earlier than the -callee coroutine. +Such elision replaces the heap allocated activation frame of the callee coroutine +with a local variable within the enclosing braces in the caller's stack frame. +The local variable, like other variables in coroutines, may be collected into the +coroutine frame, which may be allocated on the heap. The behavior is undefined +if the caller coroutine is destroyed earlier than the callee coroutine. + +}]; +} + +def CoroAwaitElidableArgumentDoc : Documentation { + let Category = DocCatDecl; + let Content = [{ + +The ``[[clang::coro_await_elidable_argument]]`` is a function parameter attribute. +It works in conjunction with ``[[clang::coro_await_elidable]]`` to propagate a +safe elide context to a parameter or parameter pack if the function is called +under a safe elide context. + +This is sometimes necessary on utility functions used to compose or modify the +behavior of a callee coroutine. + +Example: + +.. code-block:: c++ + + template <typename T> + class [[clang::coro_await_elidable]] Task { ... }; + + template <typename... T> + class [[clang::coro_await_elidable]] WhenAll { ... }; + + // `when_all` is a utility function that composes coroutines. It does not + // need to be a coroutine to propagate. + template <typename... T> + WhenAll<T...> when_all([[clang::coro_await_elidable_argument]] Task<T> tasks...); + + Task<int> foo(); + Task<int> bar(); + Task<void> example1() { + // `when_all``, `foo``, and `bar` are all elide safe because `when_all` is + // under a safe elide context and, thanks to the [[clang::coro_await_elidable_argument]] + // attribute, such context is propagated to foo and bar. + co_await when_all(foo(), bar()); + } + + Task<void> example2() { + // `when_all` and `bar` are elide safe. `foo` is not elide safe. + auto f = foo(); + co_await when_all(f, bar()); + } + + + Task<void> example3() { + // None of the calls are elide safe. + auto t = when_all(foo(), bar()); + co_await t; + } }]; } diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 6cf03d27055c..8c5d7ad763bf 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -1322,6 +1322,12 @@ def ElementwiseLog10 : Builtin { let Prototype = "void(...)"; } +def ElementwisePopcount : Builtin { + let Spellings = ["__builtin_elementwise_popcount"]; + let Attributes = [NoThrow, Const, CustomTypeChecking]; + let Prototype = "void(...)"; +} + def ElementwisePow : Builtin { let Spellings = ["__builtin_elementwise_pow"]; let Attributes = [NoThrow, Const, CustomTypeChecking]; diff --git a/clang/include/clang/Basic/Diagnostic.h b/clang/include/clang/Basic/Diagnostic.h index 54b69e985402..e17ed8f98afa 100644 --- a/clang/include/clang/Basic/Diagnostic.h +++ b/clang/include/clang/Basic/Diagnostic.h @@ -183,6 +183,41 @@ struct DiagnosticStorage { DiagnosticStorage() = default; }; +/// An allocator for DiagnosticStorage objects, which uses a small cache to +/// objects, used to reduce malloc()/free() traffic for partial diagnostics. +class DiagStorageAllocator { + static const unsigned NumCached = 16; + DiagnosticStorage Cached[NumCached]; + DiagnosticStorage *FreeList[NumCached]; + unsigned NumFreeListEntries; + +public: + DiagStorageAllocator(); + ~DiagStorageAllocator(); + + /// Allocate new storage. + DiagnosticStorage *Allocate() { + if (NumFreeListEntries == 0) + return new DiagnosticStorage; + + DiagnosticStorage *Result = FreeList[--NumFreeListEntries]; + Result->NumDiagArgs = 0; + Result->DiagRanges.clear(); + Result->FixItHints.clear(); + return Result; + } + + /// Free the given storage object. + void Deallocate(DiagnosticStorage *S) { + if (S >= Cached && S <= Cached + NumCached) { + FreeList[NumFreeListEntries++] = S; + return; + } + + delete S; + } +}; + /// Concrete class used by the front-end to report problems and issues. /// /// This massages the diagnostics (e.g. handling things like "report warnings @@ -522,27 +557,6 @@ private: void *ArgToStringCookie = nullptr; ArgToStringFnTy ArgToStringFn; - /// ID of the "delayed" diagnostic, which is a (typically - /// fatal) diagnostic that had to be delayed because it was found - /// while emitting another diagnostic. - unsigned DelayedDiagID; - - /// First string argument for the delayed diagnostic. - std::string DelayedDiagArg1; - - /// Second string argument for the delayed diagnostic. - std::string DelayedDiagArg2; - - /// Third string argument for the delayed diagnostic. - std::string DelayedDiagArg3; - - /// Optional flag value. - /// - /// Some flags accept values, for instance: -Wframe-larger-than=<value> and - /// -Rpass=<value>. The content of this string is emitted after the flag name - /// and '='. - std::string FlagValue; - public: explicit DiagnosticsEngine(IntrusiveRefCntPtr<DiagnosticIDs> Diags, IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts, @@ -949,70 +963,18 @@ public: void Report(const StoredDiagnostic &storedDiag); - /// Determine whethere there is already a diagnostic in flight. - bool isDiagnosticInFlight() const { - return CurDiagID != std::numeric_limits<unsigned>::max(); - } - - /// Set the "delayed" diagnostic that will be emitted once - /// the current diagnostic completes. - /// - /// If a diagnostic is already in-flight but the front end must - /// report a problem (e.g., with an inconsistent file system - /// state), this routine sets a "delayed" diagnostic that will be - /// emitted after the current diagnostic completes. This should - /// only be used for fatal errors detected at inconvenient - /// times. If emitting a delayed diagnostic causes a second delayed - /// diagnostic to be introduced, that second delayed diagnostic - /// will be ignored. - /// - /// \param DiagID The ID of the diagnostic being delayed. - /// - /// \param Arg1 A string argument that will be provided to the - /// diagnostic. A copy of this string will be stored in the - /// DiagnosticsEngine object itself. - /// - /// \param Arg2 A string argument that will be provided to the - /// diagnostic. A copy of this string will be stored in the - /// DiagnosticsEngine object itself. - /// - /// \param Arg3 A string argument that will be provided to the - /// diagnostic. A copy of this string will be stored in the - /// DiagnosticsEngine object itself. - void SetDelayedDiagnostic(unsigned DiagID, StringRef Arg1 = "", - StringRef Arg2 = "", StringRef Arg3 = ""); - - /// Clear out the current diagnostic. - void Clear() { CurDiagID = std::numeric_limits<unsigned>::max(); } - - /// Return the value associated with this diagnostic flag. - StringRef getFlagValue() const { return FlagValue; } - private: // This is private state used by DiagnosticBuilder. We put it here instead of // in DiagnosticBuilder in order to keep DiagnosticBuilder a small lightweight - // object. This implementation choice means that we can only have one - // diagnostic "in flight" at a time, but this seems to be a reasonable - // tradeoff to keep these objects small. Assertions verify that only one - // diagnostic is in flight at a time. + // object. This implementation choice means that we can only have a few + // diagnostics "in flight" at a time, but this seems to be a reasonable + // tradeoff to keep these objects small. friend class Diagnostic; friend class DiagnosticBuilder; friend class DiagnosticErrorTrap; friend class DiagnosticIDs; friend class PartialDiagnostic; - /// Report the delayed diagnostic. - void ReportDelayed(); - - /// The location of the current diagnostic that is in flight. - SourceLocation CurDiagLoc; - - /// The ID of the current diagnostic that is in flight. - /// - /// This is set to std::numeric_limits<unsigned>::max() when there is no - /// diagnostic in flight. - unsigned CurDiagID; - enum { /// The maximum number of arguments we can hold. /// @@ -1022,7 +984,7 @@ private: MaxArguments = DiagnosticStorage::MaxArguments, }; - DiagnosticStorage DiagStorage; + DiagStorageAllocator DiagAllocator; DiagnosticMapping makeUserMapping(diag::Severity Map, SourceLocation L) { bool isPragma = L.isValid(); @@ -1042,8 +1004,8 @@ private: /// Used to report a diagnostic that is finally fully formed. /// /// \returns true if the diagnostic was emitted, false if it was suppressed. - bool ProcessDiag() { - return Diags->ProcessDiag(*this); + bool ProcessDiag(const DiagnosticBuilder &DiagBuilder) { + return Diags->ProcessDiag(*this, DiagBuilder); } /// @name Diagnostic Emission @@ -1058,14 +1020,10 @@ protected: // Sema::Diag() patterns. friend class Sema; - /// Emit the current diagnostic and clear the diagnostic state. + /// Emit the diagnostic /// /// \param Force Emit the diagnostic regardless of suppression settings. - bool EmitCurrentDiagnostic(bool Force = false); - - unsigned getCurrentDiagID() const { return CurDiagID; } - - SourceLocation getCurrentDiagLoc() const { return CurDiagLoc; } + bool EmitDiagnostic(const DiagnosticBuilder &DB, bool Force = false); /// @} }; @@ -1118,40 +1076,7 @@ public: /// class StreamingDiagnostic { public: - /// An allocator for DiagnosticStorage objects, which uses a small cache to - /// objects, used to reduce malloc()/free() traffic for partial diagnostics. - class DiagStorageAllocator { - static const unsigned NumCached = 16; - DiagnosticStorage Cached[NumCached]; - DiagnosticStorage *FreeList[NumCached]; - unsigned NumFreeListEntries; - - public: - DiagStorageAllocator(); - ~DiagStorageAllocator(); - - /// Allocate new storage. - DiagnosticStorage *Allocate() { - if (NumFreeListEntries == 0) - return new DiagnosticStorage; - - DiagnosticStorage *Result = FreeList[--NumFreeListEntries]; - Result->NumDiagArgs = 0; - Result->DiagRanges.clear(); - Result->FixItHints.clear(); - return Result; - } - - /// Free the given storage object. - void Deallocate(DiagnosticStorage *S) { - if (S >= Cached && S <= Cached + NumCached) { - FreeList[NumFreeListEntries++] = S; - return; - } - - delete S; - } - }; + using DiagStorageAllocator = clang::DiagStorageAllocator; protected: mutable DiagnosticStorage *DiagStorage = nullptr; @@ -1240,11 +1165,6 @@ public: protected: StreamingDiagnostic() = default; - /// Construct with an external storage not owned by itself. The allocator - /// is a null pointer in this case. - explicit StreamingDiagnostic(DiagnosticStorage *Storage) - : DiagStorage(Storage) {} - /// Construct with a storage allocator which will manage the storage. The /// allocator is not a null pointer in this case. explicit StreamingDiagnostic(DiagStorageAllocator &Alloc) @@ -1275,9 +1195,20 @@ protected: class DiagnosticBuilder : public StreamingDiagnostic { friend class DiagnosticsEngine; friend class PartialDiagnostic; + friend class Diagnostic; mutable DiagnosticsEngine *DiagObj = nullptr; + SourceLocation DiagLoc; + unsigned DiagID; + + /// Optional flag value. + /// + /// Some flags accept values, for instance: -Wframe-larger-than=<value> and + /// -Rpass=<value>. The content of this string is emitted after the flag name + /// and '='. + mutable std::string FlagValue; + /// Status variable indicating if this diagnostic is still active. /// // NOTE: This field is redundant with DiagObj (IsActive iff (DiagObj == 0)), @@ -1291,16 +1222,8 @@ class DiagnosticBuilder : public StreamingDiagnostic { DiagnosticBuilder() = default; - explicit DiagnosticBuilder(DiagnosticsEngine *diagObj) - : StreamingDiagnostic(&diagObj->DiagStorage), DiagObj(diagObj), - IsActive(true) { - assert(diagObj && "DiagnosticBuilder requires a valid DiagnosticsEngine!"); - assert(DiagStorage && - "DiagnosticBuilder requires a valid DiagnosticStorage!"); - DiagStorage->NumDiagArgs = 0; - DiagStorage->DiagRanges.clear(); - DiagStorage->FixItHints.clear(); - } + DiagnosticBuilder(DiagnosticsEngine *DiagObj, SourceLocation DiagLoc, + unsigned DiagID); protected: /// Clear out the current diagnostic. @@ -1326,7 +1249,7 @@ protected: if (!isActive()) return false; // Process the diagnostic. - bool Result = DiagObj->EmitCurrentDiagnostic(IsForceEmit); + bool Result = DiagObj->EmitDiagnostic(*this, IsForceEmit); // This diagnostic is dead. Clear(); @@ -1337,13 +1260,7 @@ protected: public: /// Copy constructor. When copied, this "takes" the diagnostic info from the /// input and neuters it. - DiagnosticBuilder(const DiagnosticBuilder &D) : StreamingDiagnostic() { - DiagObj = D.DiagObj; - DiagStorage = D.DiagStorage; - IsActive = D.IsActive; - IsForceEmit = D.IsForceEmit; - D.Clear(); - } + DiagnosticBuilder(const DiagnosticBuilder &D); template <typename T> const DiagnosticBuilder &operator<<(const T &V) const { assert(isActive() && "Clients must not add to cleared diagnostic!"); @@ -1375,7 +1292,7 @@ public: return *this; } - void addFlagValue(StringRef V) const { DiagObj->FlagValue = std::string(V); } + void addFlagValue(StringRef V) const { FlagValue = std::string(V); } }; struct AddFlagValue { @@ -1550,12 +1467,7 @@ const StreamingDiagnostic &operator<<(const StreamingDiagnostic &DB, inline DiagnosticBuilder DiagnosticsEngine::Report(SourceLocation Loc, unsigned DiagID) { - assert(CurDiagID == std::numeric_limits<unsigned>::max() && - "Multiple diagnostics in flight at once!"); - CurDiagLoc = Loc; - CurDiagID = DiagID; - FlagValue.clear(); - return DiagnosticBuilder(this); + return DiagnosticBuilder(this, Loc, DiagID); } const StreamingDiagnostic &operator<<(const StreamingDiagnostic &DB, @@ -1570,24 +1482,29 @@ inline DiagnosticBuilder DiagnosticsEngine::Report(unsigned DiagID) { //===----------------------------------------------------------------------===// /// A little helper class (which is basically a smart pointer that forwards -/// info from DiagnosticsEngine) that allows clients to enquire about the -/// currently in-flight diagnostic. +/// info from DiagnosticsEngine and DiagnosticStorage) that allows clients to +/// enquire about the diagnostic. class Diagnostic { const DiagnosticsEngine *DiagObj; + SourceLocation DiagLoc; + unsigned DiagID; + std::string FlagValue; + const DiagnosticStorage &DiagStorage; std::optional<StringRef> StoredDiagMessage; public: - explicit Diagnostic(const DiagnosticsEngine *DO) : DiagObj(DO) {} - Diagnostic(const DiagnosticsEngine *DO, StringRef storedDiagMessage) - : DiagObj(DO), StoredDiagMessage(storedDiagMessage) {} + Diagnostic(const DiagnosticsEngine *DO, const DiagnosticBuilder &DiagBuilder); + Diagnostic(const DiagnosticsEngine *DO, SourceLocation DiagLoc, + unsigned DiagID, const DiagnosticStorage &DiagStorage, + StringRef StoredDiagMessage); const DiagnosticsEngine *getDiags() const { return DiagObj; } - unsigned getID() const { return DiagObj->CurDiagID; } - const SourceLocation &getLocation() const { return DiagObj->CurDiagLoc; } + unsigned getID() const { return DiagID; } + const SourceLocation &getLocation() const { return DiagLoc; } bool hasSourceManager() const { return DiagObj->hasSourceManager(); } SourceManager &getSourceManager() const { return DiagObj->getSourceManager();} - unsigned getNumArgs() const { return DiagObj->DiagStorage.NumDiagArgs; } + unsigned getNumArgs() const { return DiagStorage.NumDiagArgs; } /// Return the kind of the specified index. /// @@ -1597,8 +1514,7 @@ public: /// \pre Idx < getNumArgs() DiagnosticsEngine::ArgumentKind getArgKind(unsigned Idx) const { assert(Idx < getNumArgs() && "Argument index out of range!"); - return (DiagnosticsEngine::ArgumentKind) - DiagObj->DiagStorage.DiagArgumentsKind[Idx]; + return (DiagnosticsEngine::ArgumentKind)DiagStorage.DiagArgumentsKind[Idx]; } /// Return the provided argument string specified by \p Idx. @@ -1606,7 +1522,7 @@ public: const std::string &getArgStdStr(unsigned Idx) const { assert(getArgKind(Idx) == DiagnosticsEngine::ak_std_string && "invalid argument accessor!"); - return DiagObj->DiagStorage.DiagArgumentsStr[Idx]; + return DiagStorage.DiagArgumentsStr[Idx]; } /// Return the specified C string argument. @@ -1614,8 +1530,7 @@ public: const char *getArgCStr(unsigned Idx) const { assert(getArgKind(Idx) == DiagnosticsEngine::ak_c_string && "invalid argument accessor!"); - return reinterpret_cast<const char *>( - DiagObj->DiagStorage.DiagArgumentsVal[Idx]); + return reinterpret_cast<const char *>(DiagStorage.DiagArgumentsVal[Idx]); } /// Return the specified signed integer argument. @@ -1623,7 +1538,7 @@ public: int64_t getArgSInt(unsigned Idx) const { assert(getArgKind(Idx) == DiagnosticsEngine::ak_sint && "invalid argument accessor!"); - return (int64_t)DiagObj->DiagStorage.DiagArgumentsVal[Idx]; + return (int64_t)DiagStorage.DiagArgumentsVal[Idx]; } /// Return the specified unsigned integer argument. @@ -1631,7 +1546,7 @@ public: uint64_t getArgUInt(unsigned Idx) const { assert(getArgKind(Idx) == DiagnosticsEngine::ak_uint && "invalid argument accessor!"); - return DiagObj->DiagStorage.DiagArgumentsVal[Idx]; + return DiagStorage.DiagArgumentsVal[Idx]; } /// Return the specified IdentifierInfo argument. @@ -1640,7 +1555,7 @@ public: assert(getArgKind(Idx) == DiagnosticsEngine::ak_identifierinfo && "invalid argument accessor!"); return reinterpret_cast<IdentifierInfo *>( - DiagObj->DiagStorage.DiagArgumentsVal[Idx]); + DiagStorage.DiagArgumentsVal[Idx]); } /// Return the specified non-string argument in an opaque form. @@ -1648,37 +1563,32 @@ public: uint64_t getRawArg(unsigned Idx) const { assert(getArgKind(Idx) != DiagnosticsEngine::ak_std_string && "invalid argument accessor!"); - return DiagObj->DiagStorage.DiagArgumentsVal[Idx]; + return DiagStorage.DiagArgumentsVal[Idx]; } /// Return the number of source ranges associated with this diagnostic. - unsigned getNumRanges() const { - return DiagObj->DiagStorage.DiagRanges.size(); - } + unsigned getNumRanges() const { return DiagStorage.DiagRanges.size(); } /// \pre Idx < getNumRanges() const CharSourceRange &getRange(unsigned Idx) const { assert(Idx < getNumRanges() && "Invalid diagnostic range index!"); - return DiagObj->DiagStorage.DiagRanges[Idx]; + return DiagStorage.DiagRanges[Idx]; } /// Return an array reference for this diagnostic's ranges. - ArrayRef<CharSourceRange> getRanges() const { - return DiagObj->DiagStorage.DiagRanges; - } + ArrayRef<CharSourceRange> getRanges() const { return DiagStorage.DiagRanges; } - unsigned getNumFixItHints() const { - return DiagObj->DiagStorage.FixItHints.size(); - } + unsigned getNumFixItHints() const { return DiagStorage.FixItHints.size(); } const FixItHint &getFixItHint(unsigned Idx) const { assert(Idx < getNumFixItHints() && "Invalid index!"); - return DiagObj->DiagStorage.FixItHints[Idx]; + return DiagStorage.FixItHints[Idx]; } - ArrayRef<FixItHint> getFixItHints() const { - return DiagObj->DiagStorage.FixItHints; - } + ArrayRef<FixItHint> getFixItHints() const { return DiagStorage.FixItHints; } + + /// Return the value associated with this diagnostic flag. + StringRef getFlagValue() const { return FlagValue; } /// Format this diagnostic into a string, substituting the /// formal arguments into the %0 slots. diff --git a/clang/include/clang/Basic/DiagnosticIDs.h b/clang/include/clang/Basic/DiagnosticIDs.h index daad66f49953..1fa38ed6066e 100644 --- a/clang/include/clang/Basic/DiagnosticIDs.h +++ b/clang/include/clang/Basic/DiagnosticIDs.h @@ -24,6 +24,7 @@ namespace clang { class DiagnosticsEngine; + class DiagnosticBuilder; class SourceLocation; // Import the diagnostic enums themselves. @@ -486,11 +487,13 @@ private: /// /// \returns \c true if the diagnostic was emitted, \c false if it was /// suppressed. - bool ProcessDiag(DiagnosticsEngine &Diag) const; + bool ProcessDiag(DiagnosticsEngine &Diag, + const DiagnosticBuilder &DiagBuilder) const; /// Used to emit a diagnostic that is finally fully formed, /// ignoring suppression. - void EmitDiag(DiagnosticsEngine &Diag, Level DiagLevel) const; + void EmitDiag(DiagnosticsEngine &Diag, const DiagnosticBuilder &DiagBuilder, + Level DiagLevel) const; /// Whether the diagnostic may leave the AST in a state where some /// invariants can break. diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td index 1afadb3bff75..78510e61a639 100644 --- a/clang/include/clang/Basic/DiagnosticParseKinds.td +++ b/clang/include/clang/Basic/DiagnosticParseKinds.td @@ -113,9 +113,12 @@ def ext_cxx11_enum_fixed_underlying_type : Extension< def ext_ms_c_enum_fixed_underlying_type : Extension< "enumeration types with a fixed underlying type are a Microsoft extension">, InGroup<MicrosoftFixedEnum>; -def ext_clang_c_enum_fixed_underlying_type : Extension< - "enumeration types with a fixed underlying type are a Clang extension">, - InGroup<DiagGroup<"fixed-enum-extension">>; +def ext_c23_enum_fixed_underlying_type : Extension< + "enumeration types with a fixed underlying type are a C23 extension">, + InGroup<C23>; +def warn_c17_compat_enum_fixed_underlying_type : Warning< + "enumeration types with a fixed underlying type are incompatible with C standards before C23">, + DefaultIgnore, InGroup<CPre23Compat>; def warn_cxx98_compat_enum_fixed_underlying_type : Warning< "enumeration types with a fixed underlying type are incompatible with C++98">, InGroup<CXX98Compat>, DefaultIgnore; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index e8b64f3c5a01..ba813af960af 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -3290,7 +3290,7 @@ def err_attribute_unsupported_m_profile def err_duplicate_target_attribute : Error<"%select{unsupported|duplicate|unknown}0%select{| CPU|" " tune CPU}1 '%2' in the '%select{target|target_clones|target_version}3' " - "attribute string; ">; + "attribute string;">; // The err_*_attribute_argument_not_int are separate because they're used by // VerifyIntegerConstantExpression. def err_aligned_attribute_argument_not_int : Error< @@ -9915,7 +9915,7 @@ def err_defaulted_comparison_constexpr_mismatch : Error< "three-way comparison operator}0 cannot be " "declared %select{constexpr|consteval}2 because " "%select{it|for which the corresponding implicit 'operator==' }0 " - "invokes a non-constexpr comparison function ">; + "invokes a non-constexpr comparison function">; def note_defaulted_comparison_not_constexpr : Note< "non-constexpr comparison function would be used to compare " "%select{|member %1|base class %1}0">; @@ -11559,7 +11559,7 @@ def err_omp_wrong_device_function_call : Error< "function with 'device_type(%0)' is not available on %select{device|host}1">; def note_omp_marked_device_type_here : Note<"marked as 'device_type(%0)' here">; def err_omp_declare_target_has_local_vars : Error< - "local variable '%0' should not be used in 'declare target' directive; ">; + "local variable '%0' should not be used in 'declare target' directive;">; def warn_omp_declare_target_after_first_use : Warning< "declaration marked as declare target after first use, it may lead to incorrect results">, InGroup<OpenMPTarget>; @@ -12381,6 +12381,7 @@ def warn_hlsl_deprecated_register_type_b: Warning<"binding type 'b' only applies def warn_hlsl_deprecated_register_type_i: Warning<"binding type 'i' ignored. The 'integer constant' binding type is no longer supported">, InGroup<LegacyConstantRegisterBinding>, DefaultError; def err_hlsl_unsupported_register_number : Error<"register number should be an integer">; def err_hlsl_expected_space : Error<"invalid space specifier '%0' used; expected 'space' followed by an integer, like space1">; +def err_hlsl_space_on_global_constant : Error<"register space cannot be specified on global constants">; def warn_hlsl_packoffset_mix : Warning<"cannot mix packoffset elements with nonpackoffset elements in a cbuffer">, InGroup<HLSLMixPackOffset>; def err_hlsl_packoffset_overlap : Error<"packoffset overlap between %0, %1">; diff --git a/clang/include/clang/Basic/PartialDiagnostic.h b/clang/include/clang/Basic/PartialDiagnostic.h index 507d789c54ff..4bf6049d08fd 100644 --- a/clang/include/clang/Basic/PartialDiagnostic.h +++ b/clang/include/clang/Basic/PartialDiagnostic.h @@ -166,13 +166,10 @@ public: void EmitToString(DiagnosticsEngine &Diags, SmallVectorImpl<char> &Buf) const { - // FIXME: It should be possible to render a diagnostic to a string without - // messing with the state of the diagnostics engine. DiagnosticBuilder DB(Diags.Report(getDiagID())); Emit(DB); - Diagnostic(&Diags).FormatDiagnostic(Buf); + Diagnostic(&Diags, DB).FormatDiagnostic(Buf); DB.Clear(); - Diags.Clear(); } /// Clear out this partial diagnostic, giving it a new diagnostic ID diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 0809ac1b144e..e1c3a99cfa16 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -626,10 +626,10 @@ public: const llvm::MapVector<FieldDecl *, DeleteLocs> & getMismatchingDeleteExpressions() const; - /// Cause the active diagnostic on the DiagosticsEngine to be - /// emitted. This is closely coupled to the SemaDiagnosticBuilder class and + /// Cause the built diagnostic to be emitted on the DiagosticsEngine. + /// This is closely coupled to the SemaDiagnosticBuilder class and /// should not be used elsewhere. - void EmitCurrentDiagnostic(unsigned DiagID); + void EmitDiagnostic(unsigned DiagID, const DiagnosticBuilder &DB); void addImplicitTypedef(StringRef Name, QualType T); diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 67841a30a571..ebd4a41ee636 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -3377,7 +3377,8 @@ static void encodeTypeForFunctionPointerAuth(const ASTContext &Ctx, #include "clang/Basic/HLSLIntangibleTypes.def" case BuiltinType::Dependent: llvm_unreachable("should never get here"); - case BuiltinType::AMDGPUBufferRsrc: +#define AMDGPU_TYPE(Name, Id, SingletonId) case BuiltinType::Id: +#include "clang/Basic/AMDGPUTypes.def" case BuiltinType::WasmExternRef: #define RVV_TYPE(Name, Id, SingletonId) case BuiltinType::Id: #include "clang/Basic/RISCVVTypes.def" diff --git a/clang/lib/Basic/Diagnostic.cpp b/clang/lib/Basic/Diagnostic.cpp index ecff80a50906..0bd6845085b7 100644 --- a/clang/lib/Basic/Diagnostic.cpp +++ b/clang/lib/Basic/Diagnostic.cpp @@ -126,9 +126,7 @@ void DiagnosticsEngine::Reset(bool soft /*=false*/) { TrapNumErrorsOccurred = 0; TrapNumUnrecoverableErrorsOccurred = 0; - CurDiagID = std::numeric_limits<unsigned>::max(); LastDiagLevel = DiagnosticIDs::Ignored; - DelayedDiagID = 0; if (!soft) { // Clear state related to #pragma diagnostic. @@ -143,23 +141,6 @@ void DiagnosticsEngine::Reset(bool soft /*=false*/) { } } -void DiagnosticsEngine::SetDelayedDiagnostic(unsigned DiagID, StringRef Arg1, - StringRef Arg2, StringRef Arg3) { - if (DelayedDiagID) - return; - - DelayedDiagID = DiagID; - DelayedDiagArg1 = Arg1.str(); - DelayedDiagArg2 = Arg2.str(); - DelayedDiagArg3 = Arg3.str(); -} - -void DiagnosticsEngine::ReportDelayed() { - unsigned ID = DelayedDiagID; - DelayedDiagID = 0; - Report(ID) << DelayedDiagArg1 << DelayedDiagArg2 << DelayedDiagArg3; -} - DiagnosticMapping & DiagnosticsEngine::DiagState::getOrAddMapping(diag::kind Diag) { std::pair<iterator, bool> Result = @@ -503,39 +484,31 @@ void DiagnosticsEngine::setSeverityForAll(diag::Flavor Flavor, } void DiagnosticsEngine::Report(const StoredDiagnostic &storedDiag) { - assert(CurDiagID == std::numeric_limits<unsigned>::max() && - "Multiple diagnostics in flight at once!"); - - CurDiagLoc = storedDiag.getLocation(); - CurDiagID = storedDiag.getID(); - DiagStorage.NumDiagArgs = 0; - - DiagStorage.DiagRanges.clear(); + DiagnosticStorage DiagStorage; DiagStorage.DiagRanges.append(storedDiag.range_begin(), storedDiag.range_end()); - DiagStorage.FixItHints.clear(); DiagStorage.FixItHints.append(storedDiag.fixit_begin(), storedDiag.fixit_end()); assert(Client && "DiagnosticConsumer not set!"); Level DiagLevel = storedDiag.getLevel(); - Diagnostic Info(this, storedDiag.getMessage()); + Diagnostic Info(this, storedDiag.getLocation(), storedDiag.getID(), + DiagStorage, storedDiag.getMessage()); Client->HandleDiagnostic(DiagLevel, Info); if (Client->IncludeInDiagnosticCounts()) { if (DiagLevel == DiagnosticsEngine::Warning) ++NumWarnings; } - - CurDiagID = std::numeric_limits<unsigned>::max(); } -bool DiagnosticsEngine::EmitCurrentDiagnostic(bool Force) { +bool DiagnosticsEngine::EmitDiagnostic(const DiagnosticBuilder &DB, + bool Force) { assert(getClient() && "DiagnosticClient not set!"); bool Emitted; if (Force) { - Diagnostic Info(this); + Diagnostic Info(this, DB); // Figure out the diagnostic level of this message. DiagnosticIDs::Level DiagLevel @@ -544,24 +517,50 @@ bool DiagnosticsEngine::EmitCurrentDiagnostic(bool Force) { Emitted = (DiagLevel != DiagnosticIDs::Ignored); if (Emitted) { // Emit the diagnostic regardless of suppression level. - Diags->EmitDiag(*this, DiagLevel); + Diags->EmitDiag(*this, DB, DiagLevel); } } else { // Process the diagnostic, sending the accumulated information to the // DiagnosticConsumer. - Emitted = ProcessDiag(); + Emitted = ProcessDiag(DB); } - // Clear out the current diagnostic object. - Clear(); + return Emitted; +} + +DiagnosticBuilder::DiagnosticBuilder(DiagnosticsEngine *DiagObj, + SourceLocation DiagLoc, unsigned DiagID) + : StreamingDiagnostic(DiagObj->DiagAllocator), DiagObj(DiagObj), + DiagLoc(DiagLoc), DiagID(DiagID), IsActive(true) { + assert(DiagObj && "DiagnosticBuilder requires a valid DiagnosticsEngine!"); +} - // If there was a delayed diagnostic, emit it now. - if (!Force && DelayedDiagID) - ReportDelayed(); +DiagnosticBuilder::DiagnosticBuilder(const DiagnosticBuilder &D) + : StreamingDiagnostic() { + DiagLoc = D.DiagLoc; + DiagID = D.DiagID; + FlagValue = D.FlagValue; + DiagObj = D.DiagObj; + DiagStorage = D.DiagStorage; + D.DiagStorage = nullptr; + Allocator = D.Allocator; + IsActive = D.IsActive; + IsForceEmit = D.IsForceEmit; + D.Clear(); +} - return Emitted; +Diagnostic::Diagnostic(const DiagnosticsEngine *DO, + const DiagnosticBuilder &DiagBuilder) + : DiagObj(DO), DiagLoc(DiagBuilder.DiagLoc), DiagID(DiagBuilder.DiagID), + FlagValue(DiagBuilder.FlagValue), DiagStorage(*DiagBuilder.getStorage()) { } +Diagnostic::Diagnostic(const DiagnosticsEngine *DO, SourceLocation DiagLoc, + unsigned DiagID, const DiagnosticStorage &DiagStorage, + StringRef StoredDiagMessage) + : DiagObj(DO), DiagLoc(DiagLoc), DiagID(DiagID), DiagStorage(DiagStorage), + StoredDiagMessage(StoredDiagMessage) {} + DiagnosticConsumer::~DiagnosticConsumer() = default; void DiagnosticConsumer::HandleDiagnostic(DiagnosticsEngine::Level DiagLevel, @@ -1216,13 +1215,13 @@ bool ForwardingDiagnosticConsumer::IncludeInDiagnosticCounts() const { return Target.IncludeInDiagnosticCounts(); } -PartialDiagnostic::DiagStorageAllocator::DiagStorageAllocator() { +DiagStorageAllocator::DiagStorageAllocator() { for (unsigned I = 0; I != NumCached; ++I) FreeList[I] = Cached + I; NumFreeListEntries = NumCached; } -PartialDiagnostic::DiagStorageAllocator::~DiagStorageAllocator() { +DiagStorageAllocator::~DiagStorageAllocator() { // Don't assert if we are in a CrashRecovery context, as this invariant may // be invalidated during a crash. assert((NumFreeListEntries == NumCached || diff --git a/clang/lib/Basic/DiagnosticIDs.cpp b/clang/lib/Basic/DiagnosticIDs.cpp index cae6642bd4bd..031d9d7817d1 100644 --- a/clang/lib/Basic/DiagnosticIDs.cpp +++ b/clang/lib/Basic/DiagnosticIDs.cpp @@ -566,7 +566,7 @@ DiagnosticIDs::getDiagnosticSeverity(unsigned DiagID, SourceLocation Loc, // If explicitly requested, map fatal errors to errors. if (Result == diag::Severity::Fatal && - Diag.CurDiagID != diag::fatal_too_many_errors && Diag.FatalsAsError) + DiagID != diag::fatal_too_many_errors && Diag.FatalsAsError) Result = diag::Severity::Error; bool ShowInSystemHeader = @@ -800,8 +800,9 @@ StringRef DiagnosticIDs::getNearestOption(diag::Flavor Flavor, /// ProcessDiag - This is the method used to report a diagnostic that is /// finally fully formed. -bool DiagnosticIDs::ProcessDiag(DiagnosticsEngine &Diag) const { - Diagnostic Info(&Diag); +bool DiagnosticIDs::ProcessDiag(DiagnosticsEngine &Diag, + const DiagnosticBuilder &DiagBuilder) const { + Diagnostic Info(&Diag, DiagBuilder); assert(Diag.getClient() && "DiagnosticClient not set!"); @@ -867,22 +868,24 @@ bool DiagnosticIDs::ProcessDiag(DiagnosticsEngine &Diag) const { // stop a flood of bogus errors. if (Diag.ErrorLimit && Diag.NumErrors > Diag.ErrorLimit && DiagLevel == DiagnosticIDs::Error) { - Diag.SetDelayedDiagnostic(diag::fatal_too_many_errors); + Diag.Report(diag::fatal_too_many_errors); return false; } } // Make sure we set FatalErrorOccurred to ensure that the notes from the // diagnostic that caused `fatal_too_many_errors` won't be emitted. - if (Diag.CurDiagID == diag::fatal_too_many_errors) + if (Info.getID() == diag::fatal_too_many_errors) Diag.FatalErrorOccurred = true; // Finally, report it. - EmitDiag(Diag, DiagLevel); + EmitDiag(Diag, DiagBuilder, DiagLevel); return true; } -void DiagnosticIDs::EmitDiag(DiagnosticsEngine &Diag, Level DiagLevel) const { - Diagnostic Info(&Diag); +void DiagnosticIDs::EmitDiag(DiagnosticsEngine &Diag, + const DiagnosticBuilder &DiagBuilder, + Level DiagLevel) const { + Diagnostic Info(&Diag, DiagBuilder); assert(DiagLevel != DiagnosticIDs::Ignored && "Cannot emit ignored diagnostics!"); Diag.Client->HandleDiagnostic((DiagnosticsEngine::Level)DiagLevel, Info); @@ -890,8 +893,6 @@ void DiagnosticIDs::EmitDiag(DiagnosticsEngine &Diag, Level DiagLevel) const { if (DiagLevel == DiagnosticIDs::Warning) ++Diag.NumWarnings; } - - Diag.CurDiagID = ~0U; } bool DiagnosticIDs::isUnrecoverable(unsigned DiagID) const { diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp index d6ec26af80aa..65a8a7253e05 100644 --- a/clang/lib/Basic/SourceManager.cpp +++ b/clang/lib/Basic/SourceManager.cpp @@ -130,13 +130,8 @@ ContentCache::getBufferOrNone(DiagnosticsEngine &Diag, FileManager &FM, // the file could also have been removed during processing. Since we can't // really deal with this situation, just create an empty buffer. if (!BufferOrError) { - if (Diag.isDiagnosticInFlight()) - Diag.SetDelayedDiagnostic(diag::err_cannot_open_file, - ContentsEntry->getName(), - BufferOrError.getError().message()); - else - Diag.Report(Loc, diag::err_cannot_open_file) - << ContentsEntry->getName() << BufferOrError.getError().message(); + Diag.Report(Loc, diag::err_cannot_open_file) + << ContentsEntry->getName() << BufferOrError.getError().message(); return std::nullopt; } @@ -153,12 +148,7 @@ ContentCache::getBufferOrNone(DiagnosticsEngine &Diag, FileManager &FM, // ContentsEntry::getSize() could have the wrong size. Use // MemoryBuffer::getBufferSize() instead. if (Buffer->getBufferSize() >= std::numeric_limits<unsigned>::max()) { - if (Diag.isDiagnosticInFlight()) - Diag.SetDelayedDiagnostic(diag::err_file_too_large, - ContentsEntry->getName()); - else - Diag.Report(Loc, diag::err_file_too_large) - << ContentsEntry->getName(); + Diag.Report(Loc, diag::err_file_too_large) << ContentsEntry->getName(); return std::nullopt; } @@ -168,12 +158,7 @@ ContentCache::getBufferOrNone(DiagnosticsEngine &Diag, FileManager &FM, // have come from a stat cache). if (!ContentsEntry->isNamedPipe() && Buffer->getBufferSize() != (size_t)ContentsEntry->getSize()) { - if (Diag.isDiagnosticInFlight()) - Diag.SetDelayedDiagnostic(diag::err_file_modified, - ContentsEntry->getName()); - else - Diag.Report(Loc, diag::err_file_modified) - << ContentsEntry->getName(); + Diag.Report(Loc, diag::err_file_modified) << ContentsEntry->getName(); return std::nullopt; } diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index a52e880a7642..7e18aafcdd4b 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -3834,6 +3834,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_elementwise_floor: return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, llvm::Intrinsic::floor, "elt.floor")); + case Builtin::BI__builtin_elementwise_popcount: + return RValue::get(emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::ctpop, "elt.ctpop")); case Builtin::BI__builtin_elementwise_roundeven: return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, llvm::Intrinsic::roundeven, "elt.roundeven")); diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp index bec0a29e34fc..59d8fc830dcc 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.cpp +++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp @@ -338,6 +338,13 @@ void clang::CodeGen::CGHLSLRuntime::setHLSLEntryAttributes( NumThreadsAttr->getZ()); Fn->addFnAttr(NumThreadsKindStr, NumThreadsStr); } + if (HLSLWaveSizeAttr *WaveSizeAttr = FD->getAttr<HLSLWaveSizeAttr>()) { + const StringRef WaveSizeKindStr = "hlsl.wavesize"; + std::string WaveSizeStr = + formatv("{0},{1},{2}", WaveSizeAttr->getMin(), WaveSizeAttr->getMax(), + WaveSizeAttr->getPreferred()); + Fn->addFnAttr(WaveSizeKindStr, WaveSizeStr); + } Fn->addFnAttr(llvm::Attribute::NoInline); } diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index 74f70573c5fe..2c85d21ebd73 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -648,8 +648,6 @@ void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA, Args.MakeArgString("-plugin-opt=-mattr=" + llvm::join(Features, ","))); } - addGPULibraries(getToolChain(), Args, CmdArgs); - CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); C.addCommand(std::make_unique<Command>( @@ -1089,4 +1087,4 @@ bool AMDGPUToolChain::shouldSkipSanitizeOption( return true; } return false; -}
\ No newline at end of file +} diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 494883500342..c00df5f5bc72 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -9223,6 +9223,25 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA, A->claim(); } + // Pass in the C library for GPUs if present and not disabled. + if (!Args.hasArg(options::OPT_nostdlib, options::OPT_r, options::OPT_nogpulib, + options::OPT_nodefaultlibs, options::OPT_nolibc, + options::OPT_nogpulibc)) { + forAllAssociatedToolChains(C, JA, getToolChain(), [&](const ToolChain &TC) { + // The device C library is only available for NVPTX and AMDGPU targets + // currently. + if (!TC.getTriple().isNVPTX() && !TC.getTriple().isAMDGPU()) + return; + bool HasLibC = TC.getStdlibIncludePath().has_value(); + if (HasLibC) { + CmdArgs.push_back(Args.MakeArgString( + "--device-linker=" + TC.getTripleString() + "=" + "-lc")); + CmdArgs.push_back(Args.MakeArgString( + "--device-linker=" + TC.getTripleString() + "=" + "-lm")); + } + }); + } + // If we disable the GPU C library support it needs to be forwarded to the // link job. if (!Args.hasFlag(options::OPT_gpulibc, options::OPT_nogpulibc, true)) diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 502aba2ce4aa..043d9e487644 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -510,22 +510,6 @@ void tools::addLinkerCompressDebugSectionsOption( } } -void tools::addGPULibraries(const ToolChain &TC, const llvm::opt::ArgList &Args, - llvm::opt::ArgStringList &CmdArgs) { - if (Args.hasArg(options::OPT_nostdlib, options::OPT_r, - options::OPT_nodefaultlibs, options::OPT_nolibc, - options::OPT_nogpulibc)) - return; - - // If the user's toolchain has the 'include/<triple>/` path, we assume it - // supports the standard C libraries for the GPU and include them. - bool HasLibC = TC.getStdlibIncludePath().has_value(); - if (HasLibC) { - CmdArgs.push_back("-lc"); - CmdArgs.push_back("-lm"); - } -} - void tools::AddTargetFeature(const ArgList &Args, std::vector<StringRef> &Features, OptSpecifier OnOpt, OptSpecifier OffOpt, diff --git a/clang/lib/Driver/ToolChains/CommonArgs.h b/clang/lib/Driver/ToolChains/CommonArgs.h index 0c97398dfcfa..8695d3fe5b55 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.h +++ b/clang/lib/Driver/ToolChains/CommonArgs.h @@ -35,9 +35,6 @@ void addLinkerCompressDebugSectionsOption(const ToolChain &TC, const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs); -void addGPULibraries(const ToolChain &TC, const llvm::opt::ArgList &Args, - llvm::opt::ArgStringList &CmdArgs); - void claimNoWarnArgs(const llvm::opt::ArgList &Args); bool addSanitizerRuntimes(const ToolChain &TC, const llvm::opt::ArgList &Args, diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index ef44ffa5594d..509cd87b28c3 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -635,8 +635,6 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA, for (StringRef Feature : Features) CmdArgs.append({"--feature", Args.MakeArgString(Feature)}); - addGPULibraries(getToolChain(), Args, CmdArgs); - // Add paths for the default clang library path. SmallString<256> DefaultLibPath = llvm::sys::path::parent_path(TC.getDriver().Dir); diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp index e21b5a882b77..63949b2e26bd 100644 --- a/clang/lib/Format/FormatTokenLexer.cpp +++ b/clang/lib/Format/FormatTokenLexer.cpp @@ -100,6 +100,13 @@ ArrayRef<FormatToken *> FormatTokenLexer::lex() { if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline) FirstInLineIndex = Tokens.size() - 1; } while (Tokens.back()->isNot(tok::eof)); + if (Style.InsertNewlineAtEOF) { + auto &TokEOF = *Tokens.back(); + if (TokEOF.NewlinesBefore == 0) { + TokEOF.NewlinesBefore = 1; + TokEOF.OriginalColumn = 0; + } + } return Tokens; } diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index dfa703aed0d3..6f09835bad3a 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -2840,11 +2840,14 @@ private: if (AfterRParen->isOneOf(tok::identifier, tok::kw_this)) return true; - // Look for a cast `( x ) (`. - if (AfterRParen->is(tok::l_paren) && BeforeRParen->Previous) { - if (BeforeRParen->is(tok::identifier) && - BeforeRParen->Previous->is(tok::l_paren)) { - return true; + // Look for a cast `( x ) (`, where x may be a qualified identifier. + if (AfterRParen->is(tok::l_paren)) { + for (const auto *Prev = BeforeRParen; Prev->is(tok::identifier);) { + Prev = Prev->Previous; + if (Prev->is(tok::coloncolon)) + Prev = Prev->Previous; + if (Prev == LParen) + return true; } } @@ -3704,11 +3707,6 @@ void TokenAnnotator::annotate(AnnotatedLine &Line) { auto *First = Line.First; First->SpacesRequiredBefore = 1; First->CanBreakBefore = First->MustBreakBefore; - - if (First->is(tok::eof) && First->NewlinesBefore == 0 && - Style.InsertNewlineAtEOF) { - First->NewlinesBefore = 1; - } } // This function heuristically determines whether 'Current' starts the name of a @@ -4418,31 +4416,29 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, Right.MatchingParen == &Left && Line.Children.empty()) { return Style.SpaceInEmptyBlock; } - if ((Left.is(tok::l_paren) && Right.is(tok::r_paren)) || - (Left.is(tok::l_brace) && Left.isNot(BK_Block) && - Right.is(tok::r_brace) && Right.isNot(BK_Block))) { - return Style.SpacesInParensOptions.InEmptyParentheses; - } - if (Style.SpacesInParens == FormatStyle::SIPO_Custom && - Style.SpacesInParensOptions.ExceptDoubleParentheses && - Left.is(tok::r_paren) && Right.is(tok::r_paren)) { - auto *InnerLParen = Left.MatchingParen; - if (InnerLParen && InnerLParen->Previous == Right.MatchingParen) { - InnerLParen->SpacesRequiredBefore = 0; - return false; + if (Style.SpacesInParens == FormatStyle::SIPO_Custom) { + if ((Left.is(tok::l_paren) && Right.is(tok::r_paren)) || + (Left.is(tok::l_brace) && Left.isNot(BK_Block) && + Right.is(tok::r_brace) && Right.isNot(BK_Block))) { + return Style.SpacesInParensOptions.InEmptyParentheses; + } + if (Style.SpacesInParensOptions.ExceptDoubleParentheses && + Left.is(tok::r_paren) && Right.is(tok::r_paren)) { + auto *InnerLParen = Left.MatchingParen; + if (InnerLParen && InnerLParen->Previous == Right.MatchingParen) { + InnerLParen->SpacesRequiredBefore = 0; + return false; + } } - } - if (Style.SpacesInParensOptions.InConditionalStatements) { const FormatToken *LeftParen = nullptr; if (Left.is(tok::l_paren)) LeftParen = &Left; else if (Right.is(tok::r_paren) && Right.MatchingParen) LeftParen = Right.MatchingParen; - if (LeftParen) { - if (LeftParen->is(TT_ConditionLParen)) - return true; - if (LeftParen->Previous && isKeywordWithCondition(*LeftParen->Previous)) - return true; + if (LeftParen && (LeftParen->is(TT_ConditionLParen) || + (LeftParen->Previous && + isKeywordWithCondition(*LeftParen->Previous)))) { + return Style.SpacesInParensOptions.InConditionalStatements; } } diff --git a/clang/lib/Frontend/Rewrite/FixItRewriter.cpp b/clang/lib/Frontend/Rewrite/FixItRewriter.cpp index 44dfaf20eae7..7309553e3bc0 100644 --- a/clang/lib/Frontend/Rewrite/FixItRewriter.cpp +++ b/clang/lib/Frontend/Rewrite/FixItRewriter.cpp @@ -200,10 +200,8 @@ void FixItRewriter::HandleDiagnostic(DiagnosticsEngine::Level DiagLevel, /// Emit a diagnostic via the adapted diagnostic client. void FixItRewriter::Diag(SourceLocation Loc, unsigned DiagID) { // When producing this diagnostic, we temporarily bypass ourselves, - // clear out any current diagnostic, and let the downstream client - // format the diagnostic. + // and let the downstream client format the diagnostic. Diags.setClient(Client, false); - Diags.Clear(); Diags.Report(Loc, DiagID); Diags.setClient(this, false); } diff --git a/clang/lib/Frontend/TextDiagnosticPrinter.cpp b/clang/lib/Frontend/TextDiagnosticPrinter.cpp index c2fea3d03f0c..28f7218dc23f 100644 --- a/clang/lib/Frontend/TextDiagnosticPrinter.cpp +++ b/clang/lib/Frontend/TextDiagnosticPrinter.cpp @@ -84,7 +84,7 @@ static void printDiagnosticOptions(raw_ostream &OS, if (!Opt.empty()) { OS << (Started ? "," : " [") << (Level == DiagnosticsEngine::Remark ? "-R" : "-W") << Opt; - StringRef OptValue = Info.getDiags()->getFlagValue(); + StringRef OptValue = Info.getFlagValue(); if (!OptValue.empty()) OS << "=" << OptValue; Started = true; diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index 4c75c638b41b..f5cc07c303f9 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -156,6 +156,7 @@ set(x86_files avx10_2_512satcvtintrin.h avx10_2bf16intrin.h avx10_2convertintrin.h + avx10_2copyintrin.h avx10_2minmaxintrin.h avx10_2niintrin.h avx10_2satcvtdsintrin.h diff --git a/clang/lib/Headers/avx10_2copyintrin.h b/clang/lib/Headers/avx10_2copyintrin.h new file mode 100644 index 000000000000..7fc31190781d --- /dev/null +++ b/clang/lib/Headers/avx10_2copyintrin.h @@ -0,0 +1,34 @@ +/*===---- avx10_2copyintrin.h - AVX10.2 Copy intrinsics -------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error \ + "Never use <avx10_2copyintrin.h> directly; include <immintrin.h> instead." +#endif // __IMMINTRIN_H + +#ifndef __AVX10_2COPYINTRIN_H +#define __AVX10_2COPYINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \ + __min_vector_width__(128))) + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_move_epi32(__m128i __A) { + return (__m128i)__builtin_shufflevector( + (__v4si)__A, (__v4si)_mm_setzero_si128(), 0, 4, 4, 4); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_move_epi16(__m128i __A) { + return (__m128i)__builtin_shufflevector( + (__v8hi)__A, (__v8hi)_mm_setzero_si128(), 0, 8, 8, 8, 8, 8, 8, 8); +} + +#undef __DEFAULT_FN_ATTRS128 + +#endif // __AVX10_2COPYINTRIN_H diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h index 6a50d50ebd34..6cd6a2caf199 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h @@ -651,6 +651,77 @@ _HLSL_BUILTIN_ALIAS(__builtin_elementwise_cosh) float4 cosh(float4); //===----------------------------------------------------------------------===// +// count bits builtins +//===----------------------------------------------------------------------===// + +/// \fn T countbits(T Val) +/// \brief Return the number of bits (per component) set in the input integer. +/// \param Val The input value. + +#ifdef __HLSL_ENABLE_16_BIT +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount) +int16_t countbits(int16_t); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount) +int16_t2 countbits(int16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount) +int16_t3 countbits(int16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount) +int16_t4 countbits(int16_t4); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount) +uint16_t countbits(uint16_t); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount) +uint16_t2 countbits(uint16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount) +uint16_t3 countbits(uint16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount) +uint16_t4 countbits(uint16_t4); +#endif + +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount) +int countbits(int); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount) +int2 countbits(int2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount) +int3 countbits(int3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount) +int4 countbits(int4); + +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount) +uint countbits(uint); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount) +uint2 countbits(uint2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount) +uint3 countbits(uint3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount) +uint4 countbits(uint4); + +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount) +int64_t countbits(int64_t); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount) +int64_t2 countbits(int64_t2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount) +int64_t3 countbits(int64_t3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount) +int64_t4 countbits(int64_t4); + +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount) +uint64_t countbits(uint64_t); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount) +uint64_t2 countbits(uint64_t2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount) +uint64_t3 countbits(uint64_t3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount) +uint64_t4 countbits(uint64_t4); + +//===----------------------------------------------------------------------===// // dot product builtins //===----------------------------------------------------------------------===// diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h index 280154f3c102..3fbabffa98df 100644 --- a/clang/lib/Headers/immintrin.h +++ b/clang/lib/Headers/immintrin.h @@ -651,6 +651,7 @@ _storebe_i64(void * __P, long long __D) { #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX10_2__) #include <avx10_2bf16intrin.h> #include <avx10_2convertintrin.h> +#include <avx10_2copyintrin.h> #include <avx10_2minmaxintrin.h> #include <avx10_2niintrin.h> #include <avx10_2satcvtdsintrin.h> diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp index 1f56884be392..a04eed9873c0 100644 --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -5439,18 +5439,20 @@ void Parser::ParseEnumSpecifier(SourceLocation StartLoc, DeclSpec &DS, BaseRange = SourceRange(ColonLoc, DeclaratorInfo.getSourceRange().getEnd()); - if (!getLangOpts().ObjC && !getLangOpts().C23) { + if (!getLangOpts().ObjC) { if (getLangOpts().CPlusPlus11) Diag(ColonLoc, diag::warn_cxx98_compat_enum_fixed_underlying_type) << BaseRange; else if (getLangOpts().CPlusPlus) Diag(ColonLoc, diag::ext_cxx11_enum_fixed_underlying_type) << BaseRange; - else if (getLangOpts().MicrosoftExt) + else if (getLangOpts().MicrosoftExt && !getLangOpts().C23) Diag(ColonLoc, diag::ext_ms_c_enum_fixed_underlying_type) << BaseRange; else - Diag(ColonLoc, diag::ext_clang_c_enum_fixed_underlying_type) + Diag(ColonLoc, getLangOpts().C23 + ? diag::warn_c17_compat_enum_fixed_underlying_type + : diag::ext_c23_enum_fixed_underlying_type) << BaseRange; } } diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index 85cbbe7750c2..69d724124718 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -1590,7 +1590,7 @@ LangAS Sema::getDefaultCXXMethodAddrSpace() const { return LangAS::Default; } -void Sema::EmitCurrentDiagnostic(unsigned DiagID) { +void Sema::EmitDiagnostic(unsigned DiagID, const DiagnosticBuilder &DB) { // FIXME: It doesn't make sense to me that DiagID is an incoming argument here // and yet we also use the current diag ID on the DiagnosticsEngine. This has // been made more painfully obvious by the refactor that introduced this @@ -1598,9 +1598,9 @@ void Sema::EmitCurrentDiagnostic(unsigned DiagID) { // eliminated. If it truly cannot be (for example, there is some reentrancy // issue I am not seeing yet), then there should at least be a clarifying // comment somewhere. + Diagnostic DiagInfo(&Diags, DB); if (std::optional<TemplateDeductionInfo *> Info = isSFINAEContext()) { - switch (DiagnosticIDs::getDiagnosticSFINAEResponse( - Diags.getCurrentDiagID())) { + switch (DiagnosticIDs::getDiagnosticSFINAEResponse(DiagInfo.getID())) { case DiagnosticIDs::SFINAE_Report: // We'll report the diagnostic below. break; @@ -1613,13 +1613,11 @@ void Sema::EmitCurrentDiagnostic(unsigned DiagID) { // Make a copy of this suppressed diagnostic and store it with the // template-deduction information. if (*Info && !(*Info)->hasSFINAEDiagnostic()) { - Diagnostic DiagInfo(&Diags); (*Info)->addSFINAEDiagnostic(DiagInfo.getLocation(), PartialDiagnostic(DiagInfo, Context.getDiagAllocator())); } Diags.setLastDiagnosticIgnored(true); - Diags.Clear(); return; case DiagnosticIDs::SFINAE_AccessControl: { @@ -1630,7 +1628,7 @@ void Sema::EmitCurrentDiagnostic(unsigned DiagID) { if (!AccessCheckingSFINAE && !getLangOpts().CPlusPlus11) break; - SourceLocation Loc = Diags.getCurrentDiagLoc(); + SourceLocation Loc = DiagInfo.getLocation(); // Suppress this diagnostic. ++NumSFINAEErrors; @@ -1638,16 +1636,13 @@ void Sema::EmitCurrentDiagnostic(unsigned DiagID) { // Make a copy of this suppressed diagnostic and store it with the // template-deduction information. if (*Info && !(*Info)->hasSFINAEDiagnostic()) { - Diagnostic DiagInfo(&Diags); (*Info)->addSFINAEDiagnostic(DiagInfo.getLocation(), PartialDiagnostic(DiagInfo, Context.getDiagAllocator())); } Diags.setLastDiagnosticIgnored(true); - Diags.Clear(); - // Now the diagnostic state is clear, produce a C++98 compatibility - // warning. + // Now produce a C++98 compatibility warning. Diag(Loc, diag::warn_cxx98_compat_sfinae_access_control); // The last diagnostic which Sema produced was ignored. Suppress any @@ -1660,14 +1655,12 @@ void Sema::EmitCurrentDiagnostic(unsigned DiagID) { // Make a copy of this suppressed diagnostic and store it with the // template-deduction information; if (*Info) { - Diagnostic DiagInfo(&Diags); (*Info)->addSuppressedDiagnostic(DiagInfo.getLocation(), PartialDiagnostic(DiagInfo, Context.getDiagAllocator())); } // Suppress this diagnostic. Diags.setLastDiagnosticIgnored(true); - Diags.Clear(); return; } } @@ -1677,7 +1670,7 @@ void Sema::EmitCurrentDiagnostic(unsigned DiagID) { Context.setPrintingPolicy(getPrintingPolicy()); // Emit the diagnostic. - if (!Diags.EmitCurrentDiagnostic()) + if (!Diags.EmitDiagnostic(DB)) return; // If this is not a note, and we're in a template instantiation diff --git a/clang/lib/Sema/SemaBase.cpp b/clang/lib/Sema/SemaBase.cpp index a2f12d622e8c..5c24f21b469b 100644 --- a/clang/lib/Sema/SemaBase.cpp +++ b/clang/lib/Sema/SemaBase.cpp @@ -26,7 +26,7 @@ SemaBase::ImmediateDiagBuilder::~ImmediateDiagBuilder() { Clear(); // Dispatch to Sema to emit the diagnostic. - SemaRef.EmitCurrentDiagnostic(DiagID); + SemaRef.EmitDiagnostic(DiagID, *this); } PartialDiagnostic SemaBase::PDiag(unsigned DiagID) { diff --git a/clang/lib/Sema/SemaCast.cpp b/clang/lib/Sema/SemaCast.cpp index f01b22a72915..6ac620184347 100644 --- a/clang/lib/Sema/SemaCast.cpp +++ b/clang/lib/Sema/SemaCast.cpp @@ -446,7 +446,12 @@ static bool tryDiagnoseOverloadedCast(Sema &S, CastType CT, : InitializationKind::CreateCast(/*type range?*/ range); InitializationSequence sequence(S, entity, initKind, src); - assert(sequence.Failed() && "initialization succeeded on second try?"); + // It could happen that a constructor failed to be used because + // it requires a temporary of a broken type. Still, it will be found when + // looking for a match. + if (!sequence.Failed()) + return false; + switch (sequence.getFailureKind()) { default: return false; diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 99500daca295..d2570119c343 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2795,7 +2795,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, if (BuiltinElementwiseMath(TheCall)) return ExprError(); break; - + case Builtin::BI__builtin_elementwise_popcount: case Builtin::BI__builtin_elementwise_bitreverse: { if (PrepareBuiltinElementwiseMathOneArgCall(TheCall)) return ExprError(); diff --git a/clang/lib/Sema/SemaCoroutine.cpp b/clang/lib/Sema/SemaCoroutine.cpp index a574d56646f3..89a0beadc61f 100644 --- a/clang/lib/Sema/SemaCoroutine.cpp +++ b/clang/lib/Sema/SemaCoroutine.cpp @@ -849,12 +849,28 @@ static bool isAttributedCoroAwaitElidable(const QualType &QT) { return Record && Record->hasAttr<CoroAwaitElidableAttr>(); } -static bool isCoroAwaitElidableCall(Expr *Operand) { - if (!Operand->isPRValue()) { - return false; - } +static void applySafeElideContext(Expr *Operand) { + auto *Call = dyn_cast<CallExpr>(Operand->IgnoreImplicit()); + if (!Call || !Call->isPRValue()) + return; + + if (!isAttributedCoroAwaitElidable(Call->getType())) + return; + + Call->setCoroElideSafe(); - return isAttributedCoroAwaitElidable(Operand->getType()); + // Check parameter + auto *Fn = llvm::dyn_cast_if_present<FunctionDecl>(Call->getCalleeDecl()); + if (!Fn) + return; + + size_t ParmIdx = 0; + for (ParmVarDecl *PD : Fn->parameters()) { + if (PD->hasAttr<CoroAwaitElidableArgumentAttr>()) + applySafeElideContext(Call->getArg(ParmIdx)); + + ParmIdx++; + } } // Attempts to resolve and build a CoawaitExpr from "raw" inputs, bailing out to @@ -880,14 +896,12 @@ ExprResult Sema::BuildUnresolvedCoawaitExpr(SourceLocation Loc, Expr *Operand, } auto *RD = Promise->getType()->getAsCXXRecordDecl(); - bool AwaitElidable = - isCoroAwaitElidableCall(Operand) && - isAttributedCoroAwaitElidable( - getCurFunctionDecl(/*AllowLambda=*/true)->getReturnType()); - - if (AwaitElidable) - if (auto *Call = dyn_cast<CallExpr>(Operand->IgnoreImplicit())) - Call->setCoroElideSafe(); + + bool CurFnAwaitElidable = isAttributedCoroAwaitElidable( + getCurFunctionDecl(/*AllowLambda=*/true)->getReturnType()); + + if (CurFnAwaitElidable) + applySafeElideContext(Operand); Expr *Transformed = Operand; if (lookupMember(*this, "await_transform", RD, Loc)) { diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 8557c25b93a8..31bf50a32a83 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -9768,7 +9768,7 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC, bool ImplicitInlineCXX20 = !getLangOpts().CPlusPlusModules || NewFD->isConstexpr() || NewFD->isConsteval() || !NewFD->getOwningModule() || - NewFD->isFromExplicitGlobalModule() || + NewFD->isFromGlobalModule() || NewFD->getOwningModule()->isHeaderLikeModule(); bool isInline = D.getDeclSpec().isInlineSpecified(); bool isVirtual = D.getDeclSpec().isVirtualSpecified(); diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 80c252c79e4d..2f7e9c754ce0 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -17557,7 +17557,7 @@ static void RemoveNestedImmediateInvocation( else break; } - /// ConstantExpr are the first layer of implicit node to be removed so if + /// ConstantExprs are the first layer of implicit node to be removed so if /// Init isn't a ConstantExpr, no ConstantExpr will be skipped. if (auto *CE = dyn_cast<ConstantExpr>(Init); CE && CE->isImmediateInvocation()) @@ -17570,7 +17570,7 @@ static void RemoveNestedImmediateInvocation( } ExprResult TransformLambdaExpr(LambdaExpr *E) { // Do not rebuild lambdas to avoid creating a new type. - // Lambdas have already been processed inside their eval context. + // Lambdas have already been processed inside their eval contexts. return E; } bool AlwaysRebuild() { return false; } diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 67792be994fa..03b7c2edb605 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -40,6 +40,48 @@ #include <utility> using namespace clang; +using llvm::dxil::ResourceClass; + +enum class RegisterType { SRV, UAV, CBuffer, Sampler, C, I, Invalid }; + +static RegisterType getRegisterType(ResourceClass RC) { + switch (RC) { + case ResourceClass::SRV: + return RegisterType::SRV; + case ResourceClass::UAV: + return RegisterType::UAV; + case ResourceClass::CBuffer: + return RegisterType::CBuffer; + case ResourceClass::Sampler: + return RegisterType::Sampler; + } + llvm_unreachable("unexpected ResourceClass value"); +} + +static RegisterType getRegisterType(StringRef Slot) { + switch (Slot[0]) { + case 't': + case 'T': + return RegisterType::SRV; + case 'u': + case 'U': + return RegisterType::UAV; + case 'b': + case 'B': + return RegisterType::CBuffer; + case 's': + case 'S': + return RegisterType::Sampler; + case 'c': + case 'C': + return RegisterType::C; + case 'i': + case 'I': + return RegisterType::I; + default: + return RegisterType::Invalid; + } +} SemaHLSL::SemaHLSL(Sema &S) : SemaBase(S) {} @@ -586,8 +628,7 @@ bool clang::CreateHLSLAttributedResourceType( LocEnd = A->getRange().getEnd(); switch (A->getKind()) { case attr::HLSLResourceClass: { - llvm::dxil::ResourceClass RC = - cast<HLSLResourceClassAttr>(A)->getResourceClass(); + ResourceClass RC = cast<HLSLResourceClassAttr>(A)->getResourceClass(); if (HasResourceClass) { S.Diag(A->getLocation(), ResAttrs.ResourceClass == RC ? diag::warn_duplicate_attribute_exact @@ -672,7 +713,7 @@ bool SemaHLSL::handleResourceTypeAttr(const ParsedAttr &AL) { SourceLocation ArgLoc = Loc->Loc; // Validate resource class value - llvm::dxil::ResourceClass RC; + ResourceClass RC; if (!HLSLResourceClassAttr::ConvertStrToResourceClass(Identifier, RC)) { Diag(ArgLoc, diag::warn_attribute_type_not_supported) << "ResourceClass" << Identifier; @@ -750,28 +791,6 @@ SemaHLSL::TakeLocForHLSLAttribute(const HLSLAttributedResourceType *RT) { return LocInfo; } -struct RegisterBindingFlags { - bool Resource = false; - bool UDT = false; - bool Other = false; - bool Basic = false; - - bool SRV = false; - bool UAV = false; - bool CBV = false; - bool Sampler = false; - - bool ContainsNumeric = false; - bool DefaultGlobals = false; - - // used only when Resource == true - std::optional<llvm::dxil::ResourceClass> ResourceClass; -}; - -static bool isDeclaredWithinCOrTBuffer(const Decl *TheDecl) { - return TheDecl && isa<HLSLBufferDecl>(TheDecl->getDeclContext()); -} - // get the record decl from a var decl that we expect // represents a resource static CXXRecordDecl *getRecordDeclFromVarDecl(VarDecl *VD) { @@ -786,24 +805,6 @@ static CXXRecordDecl *getRecordDeclFromVarDecl(VarDecl *VD) { return TheRecordDecl; } -static void updateResourceClassFlagsFromDeclResourceClass( - RegisterBindingFlags &Flags, llvm::hlsl::ResourceClass DeclResourceClass) { - switch (DeclResourceClass) { - case llvm::hlsl::ResourceClass::SRV: - Flags.SRV = true; - break; - case llvm::hlsl::ResourceClass::UAV: - Flags.UAV = true; - break; - case llvm::hlsl::ResourceClass::CBuffer: - Flags.CBV = true; - break; - case llvm::hlsl::ResourceClass::Sampler: - Flags.Sampler = true; - break; - } -} - const HLSLAttributedResourceType * findAttributedResourceTypeOnField(VarDecl *VD) { assert(VD != nullptr && "expected VarDecl"); @@ -817,8 +818,10 @@ findAttributedResourceTypeOnField(VarDecl *VD) { return nullptr; } -static void updateResourceClassFlagsFromRecordType(RegisterBindingFlags &Flags, - const RecordType *RT) { +// Iterate over RecordType fields and return true if any of them matched the +// register type +static bool ContainsResourceForRegisterType(Sema &S, const RecordType *RT, + RegisterType RegType) { llvm::SmallVector<const Type *> TypesToScan; TypesToScan.emplace_back(RT); @@ -827,8 +830,8 @@ static void updateResourceClassFlagsFromRecordType(RegisterBindingFlags &Flags, while (T->isArrayType()) T = T->getArrayElementTypeNoTypeQual(); if (T->isIntegralOrEnumerationType() || T->isFloatingType()) { - Flags.ContainsNumeric = true; - continue; + if (RegType == RegisterType::C) + return true; } const RecordType *RT = T->getAs<RecordType>(); if (!RT) @@ -839,100 +842,84 @@ static void updateResourceClassFlagsFromRecordType(RegisterBindingFlags &Flags, const Type *FieldTy = FD->getType().getTypePtr(); if (const HLSLAttributedResourceType *AttrResType = dyn_cast<HLSLAttributedResourceType>(FieldTy)) { - updateResourceClassFlagsFromDeclResourceClass( - Flags, AttrResType->getAttrs().ResourceClass); - continue; + ResourceClass RC = AttrResType->getAttrs().ResourceClass; + if (getRegisterType(RC) == RegType) + return true; + } else { + TypesToScan.emplace_back(FD->getType().getTypePtr()); } - TypesToScan.emplace_back(FD->getType().getTypePtr()); } } + return false; } -static RegisterBindingFlags HLSLFillRegisterBindingFlags(Sema &S, - Decl *TheDecl) { - RegisterBindingFlags Flags; +static void CheckContainsResourceForRegisterType(Sema &S, + SourceLocation &ArgLoc, + Decl *D, RegisterType RegType, + bool SpecifiedSpace) { + int RegTypeNum = static_cast<int>(RegType); // check if the decl type is groupshared - if (TheDecl->hasAttr<HLSLGroupSharedAddressSpaceAttr>()) { - Flags.Other = true; - return Flags; + if (D->hasAttr<HLSLGroupSharedAddressSpaceAttr>()) { + S.Diag(ArgLoc, diag::err_hlsl_binding_type_mismatch) << RegTypeNum; + return; } // Cbuffers and Tbuffers are HLSLBufferDecl types - if (HLSLBufferDecl *CBufferOrTBuffer = dyn_cast<HLSLBufferDecl>(TheDecl)) { - Flags.Resource = true; - Flags.ResourceClass = CBufferOrTBuffer->isCBuffer() - ? llvm::dxil::ResourceClass::CBuffer - : llvm::dxil::ResourceClass::SRV; + if (HLSLBufferDecl *CBufferOrTBuffer = dyn_cast<HLSLBufferDecl>(D)) { + ResourceClass RC = CBufferOrTBuffer->isCBuffer() ? ResourceClass::CBuffer + : ResourceClass::SRV; + if (RegType != getRegisterType(RC)) + S.Diag(D->getLocation(), diag::err_hlsl_binding_type_mismatch) + << RegTypeNum; + return; } + // Samplers, UAVs, and SRVs are VarDecl types - else if (VarDecl *TheVarDecl = dyn_cast<VarDecl>(TheDecl)) { - if (const HLSLAttributedResourceType *AttrResType = - findAttributedResourceTypeOnField(TheVarDecl)) { - Flags.Resource = true; - Flags.ResourceClass = AttrResType->getAttrs().ResourceClass; - } else { - const clang::Type *TheBaseType = TheVarDecl->getType().getTypePtr(); - while (TheBaseType->isArrayType()) - TheBaseType = TheBaseType->getArrayElementTypeNoTypeQual(); - - if (TheBaseType->isArithmeticType()) { - Flags.Basic = true; - if (!isDeclaredWithinCOrTBuffer(TheDecl) && - (TheBaseType->isIntegralType(S.getASTContext()) || - TheBaseType->isFloatingType())) - Flags.DefaultGlobals = true; - } else if (TheBaseType->isRecordType()) { - Flags.UDT = true; - const RecordType *TheRecordTy = TheBaseType->getAs<RecordType>(); - updateResourceClassFlagsFromRecordType(Flags, TheRecordTy); - } else - Flags.Other = true; - } - } else { - llvm_unreachable("expected be VarDecl or HLSLBufferDecl"); + assert(isa<VarDecl>(D) && "D is expected to be VarDecl or HLSLBufferDecl"); + VarDecl *VD = cast<VarDecl>(D); + + // Resource + if (const HLSLAttributedResourceType *AttrResType = + findAttributedResourceTypeOnField(VD)) { + if (RegType != getRegisterType(AttrResType->getAttrs().ResourceClass)) + S.Diag(D->getLocation(), diag::err_hlsl_binding_type_mismatch) + << RegTypeNum; + return; } - return Flags; -} -enum class RegisterType { SRV, UAV, CBuffer, Sampler, C, I, Invalid }; + const clang::Type *Ty = VD->getType().getTypePtr(); + while (Ty->isArrayType()) + Ty = Ty->getArrayElementTypeNoTypeQual(); -static RegisterType getRegisterType(llvm::dxil::ResourceClass RC) { - switch (RC) { - case llvm::dxil::ResourceClass::SRV: - return RegisterType::SRV; - case llvm::dxil::ResourceClass::UAV: - return RegisterType::UAV; - case llvm::dxil::ResourceClass::CBuffer: - return RegisterType::CBuffer; - case llvm::dxil::ResourceClass::Sampler: - return RegisterType::Sampler; - } - llvm_unreachable("unexpected ResourceClass value"); -} + // Basic types + if (Ty->isArithmeticType()) { + bool DeclaredInCOrTBuffer = isa<HLSLBufferDecl>(D->getDeclContext()); + if (SpecifiedSpace && !DeclaredInCOrTBuffer) + S.Diag(ArgLoc, diag::err_hlsl_space_on_global_constant); -static RegisterType getRegisterType(StringRef Slot) { - switch (Slot[0]) { - case 't': - case 'T': - return RegisterType::SRV; - case 'u': - case 'U': - return RegisterType::UAV; - case 'b': - case 'B': - return RegisterType::CBuffer; - case 's': - case 'S': - return RegisterType::Sampler; - case 'c': - case 'C': - return RegisterType::C; - case 'i': - case 'I': - return RegisterType::I; - default: - return RegisterType::Invalid; + if (!DeclaredInCOrTBuffer && + (Ty->isIntegralType(S.getASTContext()) || Ty->isFloatingType())) { + // Default Globals + if (RegType == RegisterType::CBuffer) + S.Diag(ArgLoc, diag::warn_hlsl_deprecated_register_type_b); + else if (RegType != RegisterType::C) + S.Diag(ArgLoc, diag::err_hlsl_binding_type_mismatch) << RegTypeNum; + } else { + if (RegType == RegisterType::C) + S.Diag(ArgLoc, diag::warn_hlsl_register_type_c_packoffset); + else + S.Diag(ArgLoc, diag::err_hlsl_binding_type_mismatch) << RegTypeNum; + } + } else if (Ty->isRecordType()) { + // Class/struct types - walk the declaration and check each field and + // subclass + if (!ContainsResourceForRegisterType(S, Ty->getAs<RecordType>(), RegType)) + S.Diag(D->getLocation(), diag::warn_hlsl_user_defined_type_missing_member) + << RegTypeNum; + } else { + // Anything else is an error + S.Diag(ArgLoc, diag::err_hlsl_binding_type_mismatch) << RegTypeNum; } } @@ -969,73 +956,19 @@ static void ValidateMultipleRegisterAnnotations(Sema &S, Decl *TheDecl, } static void DiagnoseHLSLRegisterAttribute(Sema &S, SourceLocation &ArgLoc, - Decl *TheDecl, RegisterType regType) { + Decl *D, RegisterType RegType, + bool SpecifiedSpace) { // exactly one of these two types should be set - assert(((isa<VarDecl>(TheDecl) && !isa<HLSLBufferDecl>(TheDecl)) || - (!isa<VarDecl>(TheDecl) && isa<HLSLBufferDecl>(TheDecl))) && + assert(((isa<VarDecl>(D) && !isa<HLSLBufferDecl>(D)) || + (!isa<VarDecl>(D) && isa<HLSLBufferDecl>(D))) && "expecting VarDecl or HLSLBufferDecl"); - RegisterBindingFlags Flags = HLSLFillRegisterBindingFlags(S, TheDecl); - assert((int)Flags.Other + (int)Flags.Resource + (int)Flags.Basic + - (int)Flags.UDT == - 1 && - "only one resource analysis result should be expected"); - - int regTypeNum = static_cast<int>(regType); - - // first, if "other" is set, emit an error - if (Flags.Other) { - S.Diag(ArgLoc, diag::err_hlsl_binding_type_mismatch) << regTypeNum; - return; - } + // check if the declaration contains resource matching the register type + CheckContainsResourceForRegisterType(S, ArgLoc, D, RegType, SpecifiedSpace); // next, if multiple register annotations exist, check that none conflict. - ValidateMultipleRegisterAnnotations(S, TheDecl, regType); - - // next, if resource is set, make sure the register type in the register - // annotation is compatible with the variable's resource type. - if (Flags.Resource) { - RegisterType expRegType = getRegisterType(Flags.ResourceClass.value()); - if (regType != expRegType) { - S.Diag(TheDecl->getLocation(), diag::err_hlsl_binding_type_mismatch) - << regTypeNum; - } - return; - } - - // next, handle diagnostics for when the "basic" flag is set - if (Flags.Basic) { - if (Flags.DefaultGlobals) { - if (regType == RegisterType::CBuffer) - S.Diag(ArgLoc, diag::warn_hlsl_deprecated_register_type_b); - else if (regType != RegisterType::C) - S.Diag(ArgLoc, diag::err_hlsl_binding_type_mismatch) << regTypeNum; - return; - } - - if (regType == RegisterType::C) - S.Diag(ArgLoc, diag::warn_hlsl_register_type_c_packoffset); - else - S.Diag(ArgLoc, diag::err_hlsl_binding_type_mismatch) << regTypeNum; - - return; - } - - // finally, we handle the udt case - if (Flags.UDT) { - const bool ExpectedRegisterTypesForUDT[] = { - Flags.SRV, Flags.UAV, Flags.CBV, Flags.Sampler, Flags.ContainsNumeric}; - assert((size_t)regTypeNum < std::size(ExpectedRegisterTypesForUDT) && - "regType has unexpected value"); - - if (!ExpectedRegisterTypesForUDT[regTypeNum]) - S.Diag(TheDecl->getLocation(), - diag::warn_hlsl_user_defined_type_missing_member) - << regTypeNum; - - return; - } + ValidateMultipleRegisterAnnotations(S, D, RegType); } void SemaHLSL::handleResourceBindingAttr(Decl *TheDecl, const ParsedAttr &AL) { @@ -1059,7 +992,9 @@ void SemaHLSL::handleResourceBindingAttr(Decl *TheDecl, const ParsedAttr &AL) { SourceLocation ArgLoc = Loc->Loc; SourceLocation SpaceArgLoc; + bool SpecifiedSpace = false; if (AL.getNumArgs() == 2) { + SpecifiedSpace = true; Slot = Str; if (!AL.isArgIdent(1)) { Diag(AL.getLoc(), diag::err_attribute_argument_type) @@ -1107,7 +1042,8 @@ void SemaHLSL::handleResourceBindingAttr(Decl *TheDecl, const ParsedAttr &AL) { return; } - DiagnoseHLSLRegisterAttribute(SemaRef, ArgLoc, TheDecl, regType); + DiagnoseHLSLRegisterAttribute(SemaRef, ArgLoc, TheDecl, regType, + SpecifiedSpace); HLSLResourceBindingAttr *NewAttr = HLSLResourceBindingAttr::Create(getASTContext(), Slot, Space, AL); diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index b952ffbd69f5..9afb8cea26fe 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -2861,113 +2861,120 @@ void SemaOpenMP::EndOpenMPDSABlock(Stmt *CurDirective) { // clause requires an accessible, unambiguous default constructor for the // class type, unless the list item is also specified in a firstprivate // clause. - if (const auto *D = dyn_cast_or_null<OMPExecutableDirective>(CurDirective)) { - for (OMPClause *C : D->clauses()) { - if (auto *Clause = dyn_cast<OMPLastprivateClause>(C)) { - SmallVector<Expr *, 8> PrivateCopies; - for (Expr *DE : Clause->varlist()) { - if (DE->isValueDependent() || DE->isTypeDependent()) { - PrivateCopies.push_back(nullptr); - continue; - } - auto *DRE = cast<DeclRefExpr>(DE->IgnoreParens()); - auto *VD = cast<VarDecl>(DRE->getDecl()); - QualType Type = VD->getType().getNonReferenceType(); - const DSAStackTy::DSAVarData DVar = - DSAStack->getTopDSA(VD, /*FromParent=*/false); - if (DVar.CKind == OMPC_lastprivate) { - // Generate helper private variable and initialize it with the - // default value. The address of the original variable is replaced - // by the address of the new private variable in CodeGen. This new - // variable is not added to IdResolver, so the code in the OpenMP - // region uses original variable for proper diagnostics. - VarDecl *VDPrivate = buildVarDecl( - SemaRef, DE->getExprLoc(), Type.getUnqualifiedType(), - VD->getName(), VD->hasAttrs() ? &VD->getAttrs() : nullptr, DRE); - SemaRef.ActOnUninitializedDecl(VDPrivate); - if (VDPrivate->isInvalidDecl()) { - PrivateCopies.push_back(nullptr); - continue; - } - PrivateCopies.push_back(buildDeclRefExpr( - SemaRef, VDPrivate, DE->getType(), DE->getExprLoc())); - } else { - // The variable is also a firstprivate, so initialization sequence - // for private copy is generated already. - PrivateCopies.push_back(nullptr); - } - } - Clause->setPrivateCopies(PrivateCopies); + + auto FinalizeLastprivate = [&](OMPLastprivateClause *Clause) { + SmallVector<Expr *, 8> PrivateCopies; + for (Expr *DE : Clause->varlist()) { + if (DE->isValueDependent() || DE->isTypeDependent()) { + PrivateCopies.push_back(nullptr); continue; } - // Finalize nontemporal clause by handling private copies, if any. - if (auto *Clause = dyn_cast<OMPNontemporalClause>(C)) { - SmallVector<Expr *, 8> PrivateRefs; - for (Expr *RefExpr : Clause->varlist()) { - assert(RefExpr && "NULL expr in OpenMP nontemporal clause."); - SourceLocation ELoc; - SourceRange ERange; - Expr *SimpleRefExpr = RefExpr; - auto Res = getPrivateItem(SemaRef, SimpleRefExpr, ELoc, ERange); - if (Res.second) - // It will be analyzed later. - PrivateRefs.push_back(RefExpr); - ValueDecl *D = Res.first; - if (!D) - continue; - - const DSAStackTy::DSAVarData DVar = - DSAStack->getTopDSA(D, /*FromParent=*/false); - PrivateRefs.push_back(DVar.PrivateCopy ? DVar.PrivateCopy - : SimpleRefExpr); - } - Clause->setPrivateRefs(PrivateRefs); + auto *DRE = cast<DeclRefExpr>(DE->IgnoreParens()); + auto *VD = cast<VarDecl>(DRE->getDecl()); + QualType Type = VD->getType().getNonReferenceType(); + const DSAStackTy::DSAVarData DVar = + DSAStack->getTopDSA(VD, /*FromParent=*/false); + if (DVar.CKind != OMPC_lastprivate) { + // The variable is also a firstprivate, so initialization sequence + // for private copy is generated already. + PrivateCopies.push_back(nullptr); continue; } - if (auto *Clause = dyn_cast<OMPUsesAllocatorsClause>(C)) { - for (unsigned I = 0, E = Clause->getNumberOfAllocators(); I < E; ++I) { - OMPUsesAllocatorsClause::Data D = Clause->getAllocatorData(I); - auto *DRE = dyn_cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()); - if (!DRE) - continue; - ValueDecl *VD = DRE->getDecl(); - if (!VD || !isa<VarDecl>(VD)) - continue; - DSAStackTy::DSAVarData DVar = - DSAStack->getTopDSA(VD, /*FromParent=*/false); - // OpenMP [2.12.5, target Construct] - // Memory allocators that appear in a uses_allocators clause cannot - // appear in other data-sharing attribute clauses or data-mapping - // attribute clauses in the same construct. - Expr *MapExpr = nullptr; - if (DVar.RefExpr || - DSAStack->checkMappableExprComponentListsForDecl( - VD, /*CurrentRegionOnly=*/true, - [VD, &MapExpr]( - OMPClauseMappableExprCommon::MappableExprComponentListRef - MapExprComponents, - OpenMPClauseKind C) { - auto MI = MapExprComponents.rbegin(); - auto ME = MapExprComponents.rend(); - if (MI != ME && - MI->getAssociatedDeclaration()->getCanonicalDecl() == - VD->getCanonicalDecl()) { - MapExpr = MI->getAssociatedExpression(); - return true; - } - return false; - })) { - Diag(D.Allocator->getExprLoc(), - diag::err_omp_allocator_used_in_clauses) - << D.Allocator->getSourceRange(); - if (DVar.RefExpr) - reportOriginalDsa(SemaRef, DSAStack, VD, DVar); - else - Diag(MapExpr->getExprLoc(), diag::note_used_here) - << MapExpr->getSourceRange(); - } - } + // Generate helper private variable and initialize it with the + // default value. The address of the original variable is replaced + // by the address of the new private variable in CodeGen. This new + // variable is not added to IdResolver, so the code in the OpenMP + // region uses original variable for proper diagnostics. + VarDecl *VDPrivate = buildVarDecl( + SemaRef, DE->getExprLoc(), Type.getUnqualifiedType(), VD->getName(), + VD->hasAttrs() ? &VD->getAttrs() : nullptr, DRE); + SemaRef.ActOnUninitializedDecl(VDPrivate); + if (VDPrivate->isInvalidDecl()) { + PrivateCopies.push_back(nullptr); + continue; + } + PrivateCopies.push_back(buildDeclRefExpr( + SemaRef, VDPrivate, DE->getType(), DE->getExprLoc())); + } + Clause->setPrivateCopies(PrivateCopies); + }; + + auto FinalizeNontemporal = [&](OMPNontemporalClause *Clause) { + // Finalize nontemporal clause by handling private copies, if any. + SmallVector<Expr *, 8> PrivateRefs; + for (Expr *RefExpr : Clause->varlist()) { + assert(RefExpr && "NULL expr in OpenMP nontemporal clause."); + SourceLocation ELoc; + SourceRange ERange; + Expr *SimpleRefExpr = RefExpr; + auto Res = getPrivateItem(SemaRef, SimpleRefExpr, ELoc, ERange); + if (Res.second) + // It will be analyzed later. + PrivateRefs.push_back(RefExpr); + ValueDecl *D = Res.first; + if (!D) continue; + + const DSAStackTy::DSAVarData DVar = + DSAStack->getTopDSA(D, /*FromParent=*/false); + PrivateRefs.push_back(DVar.PrivateCopy ? DVar.PrivateCopy + : SimpleRefExpr); + } + Clause->setPrivateRefs(PrivateRefs); + }; + + auto FinalizeAllocators = [&](OMPUsesAllocatorsClause *Clause) { + for (unsigned I = 0, E = Clause->getNumberOfAllocators(); I < E; ++I) { + OMPUsesAllocatorsClause::Data D = Clause->getAllocatorData(I); + auto *DRE = dyn_cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()); + if (!DRE) + continue; + ValueDecl *VD = DRE->getDecl(); + if (!VD || !isa<VarDecl>(VD)) + continue; + DSAStackTy::DSAVarData DVar = + DSAStack->getTopDSA(VD, /*FromParent=*/false); + // OpenMP [2.12.5, target Construct] + // Memory allocators that appear in a uses_allocators clause cannot + // appear in other data-sharing attribute clauses or data-mapping + // attribute clauses in the same construct. + Expr *MapExpr = nullptr; + if (DVar.RefExpr || + DSAStack->checkMappableExprComponentListsForDecl( + VD, /*CurrentRegionOnly=*/true, + [VD, &MapExpr]( + OMPClauseMappableExprCommon::MappableExprComponentListRef + MapExprComponents, + OpenMPClauseKind C) { + auto MI = MapExprComponents.rbegin(); + auto ME = MapExprComponents.rend(); + if (MI != ME && + MI->getAssociatedDeclaration()->getCanonicalDecl() == + VD->getCanonicalDecl()) { + MapExpr = MI->getAssociatedExpression(); + return true; + } + return false; + })) { + Diag(D.Allocator->getExprLoc(), diag::err_omp_allocator_used_in_clauses) + << D.Allocator->getSourceRange(); + if (DVar.RefExpr) + reportOriginalDsa(SemaRef, DSAStack, VD, DVar); + else + Diag(MapExpr->getExprLoc(), diag::note_used_here) + << MapExpr->getSourceRange(); + } + } + }; + + if (const auto *D = dyn_cast_or_null<OMPExecutableDirective>(CurDirective)) { + for (OMPClause *C : D->clauses()) { + if (auto *Clause = dyn_cast<OMPLastprivateClause>(C)) { + FinalizeLastprivate(Clause); + } else if (auto *Clause = dyn_cast<OMPNontemporalClause>(C)) { + FinalizeNontemporal(Clause); + } else if (auto *Clause = dyn_cast<OMPUsesAllocatorsClause>(C)) { + FinalizeAllocators(Clause); } } // Check allocate clauses. diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index e5ea02a919f4..b052afede2cd 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -5508,50 +5508,31 @@ bool Sema::CheckTemplateArgumentList( } // Check whether we have a default argument. - TemplateArgumentLoc Arg; + bool HasDefaultArg; // Retrieve the default template argument from the template // parameter. For each kind of template parameter, we substitute the // template arguments provided thus far and any "outer" template arguments // (when the template parameter was part of a nested template) into // the default argument. - if (TemplateTypeParmDecl *TTP = dyn_cast<TemplateTypeParmDecl>(*Param)) { - if (!hasReachableDefaultArgument(TTP)) - return diagnoseMissingArgument(*this, TemplateLoc, Template, TTP, + TemplateArgumentLoc Arg = SubstDefaultTemplateArgumentIfAvailable( + Template, TemplateLoc, RAngleLoc, *Param, SugaredConverted, + CanonicalConverted, HasDefaultArg); + + if (Arg.getArgument().isNull()) { + if (!HasDefaultArg) { + if (TemplateTypeParmDecl *TTP = dyn_cast<TemplateTypeParmDecl>(*Param)) + return diagnoseMissingArgument(*this, TemplateLoc, Template, TTP, + NewArgs); + if (NonTypeTemplateParmDecl *NTTP = + dyn_cast<NonTypeTemplateParmDecl>(*Param)) + return diagnoseMissingArgument(*this, TemplateLoc, Template, NTTP, + NewArgs); + return diagnoseMissingArgument(*this, TemplateLoc, Template, + cast<TemplateTemplateParmDecl>(*Param), NewArgs); - - if (SubstDefaultTemplateArgument(*this, Template, TemplateLoc, RAngleLoc, - TTP, SugaredConverted, - CanonicalConverted, Arg)) - return true; - } else if (NonTypeTemplateParmDecl *NTTP - = dyn_cast<NonTypeTemplateParmDecl>(*Param)) { - if (!hasReachableDefaultArgument(NTTP)) - return diagnoseMissingArgument(*this, TemplateLoc, Template, NTTP, - NewArgs); - - if (SubstDefaultTemplateArgument(*this, Template, TemplateLoc, RAngleLoc, - NTTP, SugaredConverted, - CanonicalConverted, Arg)) - return true; - } else { - TemplateTemplateParmDecl *TempParm - = cast<TemplateTemplateParmDecl>(*Param); - - if (!hasReachableDefaultArgument(TempParm)) - return diagnoseMissingArgument(*this, TemplateLoc, Template, TempParm, - NewArgs); - - NestedNameSpecifierLoc QualifierLoc; - TemplateName Name = SubstDefaultTemplateArgument( - *this, Template, TemplateLoc, RAngleLoc, TempParm, SugaredConverted, - CanonicalConverted, QualifierLoc); - if (Name.isNull()) - return true; - - Arg = TemplateArgumentLoc( - Context, TemplateArgument(Name), QualifierLoc, - TempParm->getDefaultArgument().getTemplateNameLoc()); + } + return true; } // Introduce an instantiation record that describes where we are using diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp index b50648d5752c..7d83b86a0073 100644 --- a/clang/lib/Sema/SemaTemplateDeduction.cpp +++ b/clang/lib/Sema/SemaTemplateDeduction.cpp @@ -5505,8 +5505,11 @@ static TemplateDeductionResult CheckDeductionConsistency( Sema::ArgumentPackSubstitutionIndexRAII PackIndex( S, ArgIdx != -1 ? ::getPackIndexForParam(S, FTD, MLTAL, ArgIdx) : -1); bool IsIncompleteSubstitution = false; - QualType InstP = S.SubstType(P, MLTAL, FTD->getLocation(), FTD->getDeclName(), - &IsIncompleteSubstitution); + // FIXME: A substitution can be incomplete on a non-structural part of the + // type. Use the canonical type for now, until the TemplateInstantiator can + // deal with that. + QualType InstP = S.SubstType(P.getCanonicalType(), MLTAL, FTD->getLocation(), + FTD->getDeclName(), &IsIncompleteSubstitution); if (InstP.isNull() && !IsIncompleteSubstitution) return TemplateDeductionResult::SubstitutionFailure; if (!CheckConsistency) diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index c42cc250bb90..55f38743e276 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -1673,6 +1673,10 @@ namespace { } ExprResult TransformLambdaExpr(LambdaExpr *E) { + // Do not rebuild lambdas to avoid creating a new type. + // Lambdas have already been processed inside their eval contexts. + if (SemaRef.RebuildingImmediateInvocation) + return E; LocalInstantiationScope Scope(SemaRef, /*CombineWithOuterScope=*/true); Sema::ConstraintEvalRAII<TemplateInstantiator> RAII(*this); diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index e97a7d768b93..e055c87e7838 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -6294,9 +6294,13 @@ NamedDecl *Sema::FindInstantiatedDecl(SourceLocation Loc, NamedDecl *D, if (!SubstRecord) { // T can be a dependent TemplateSpecializationType when performing a - // substitution for building a deduction guide. - assert(CodeSynthesisContexts.back().Kind == - CodeSynthesisContext::BuildingDeductionGuides); + // substitution for building a deduction guide or for template + // argument deduction in the process of rebuilding immediate + // expressions. (Because the default argument that involves a lambda + // is untransformed and thus could be dependent at this point.) + assert(SemaRef.RebuildingImmediateInvocation || + CodeSynthesisContexts.back().Kind == + CodeSynthesisContext::BuildingDeductionGuides); // Return a nullptr as a sentinel value, we handle it properly in // the TemplateInstantiator::TransformInjectedClassNameType // override, which we transform it to a TemplateSpecializationType. diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 4fae6ff02ea9..7efcc81e194d 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -1382,7 +1382,7 @@ bool ASTReader::ReadVisibleDeclContextStorage(ModuleFile &M, void ASTReader::Error(StringRef Msg) const { Error(diag::err_fe_pch_malformed, Msg); - if (PP.getLangOpts().Modules && !Diags.isDiagnosticInFlight() && + if (PP.getLangOpts().Modules && !PP.getHeaderSearchInfo().getModuleCachePath().empty()) { Diag(diag::note_module_cache_path) << PP.getHeaderSearchInfo().getModuleCachePath(); @@ -1391,10 +1391,7 @@ void ASTReader::Error(StringRef Msg) const { void ASTReader::Error(unsigned DiagID, StringRef Arg1, StringRef Arg2, StringRef Arg3) const { - if (Diags.isDiagnosticInFlight()) - Diags.SetDelayedDiagnostic(DiagID, Arg1, Arg2, Arg3); - else - Diag(DiagID) << Arg1 << Arg2 << Arg3; + Diag(DiagID) << Arg1 << Arg2 << Arg3; } void ASTReader::Error(llvm::Error &&Err) const { @@ -2713,7 +2710,7 @@ InputFile ASTReader::getInputFile(ModuleFile &F, unsigned ID, bool Complain) { // For an overridden file, there is nothing to validate. if (!Overridden && FileChange.Kind != Change::None) { - if (Complain && !Diags.isDiagnosticInFlight()) { + if (Complain) { // Build a list of the PCH imports that got us here (in reverse). SmallVector<ModuleFile *, 4> ImportStack(1, &F); while (!ImportStack.back()->ImportedBy.empty()) @@ -3689,10 +3686,8 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F, SourceMgr.AllocateLoadedSLocEntries(F.LocalNumSLocEntries, SLocSpaceSize); if (!F.SLocEntryBaseID) { - if (!Diags.isDiagnosticInFlight()) { - Diags.Report(SourceLocation(), diag::remark_sloc_usage); - SourceMgr.noteSLocAddressSpaceUsage(Diags); - } + Diags.Report(SourceLocation(), diag::remark_sloc_usage); + SourceMgr.noteSLocAddressSpaceUsage(Diags); return llvm::createStringError(std::errc::invalid_argument, "ran out of source locations"); } diff --git a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp index 8bb7880a3cc2..0a823a1126ce 100644 --- a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp @@ -1835,6 +1835,46 @@ StreamChecker::ensureStreamNonNull(SVal StreamVal, const Expr *StreamE, return StateNotNull; } +namespace { +class StreamClosedVisitor final : public BugReporterVisitor { + const SymbolRef StreamSym; + bool Satisfied = false; + +public: + explicit StreamClosedVisitor(SymbolRef StreamSym) : StreamSym(StreamSym) {} + + static void *getTag() { + static int Tag = 0; + return &Tag; + } + + void Profile(llvm::FoldingSetNodeID &ID) const override { + ID.AddPointer(getTag()); + ID.AddPointer(StreamSym); + } + + PathDiagnosticPieceRef VisitNode(const ExplodedNode *N, + BugReporterContext &BRC, + PathSensitiveBugReport &BR) override { + if (Satisfied) + return nullptr; + const StreamState *PredSS = + N->getFirstPred()->getState()->get<StreamMap>(StreamSym); + if (PredSS && PredSS->isClosed()) + return nullptr; + + const Stmt *S = N->getStmtForDiagnostics(); + if (!S) + return nullptr; + Satisfied = true; + PathDiagnosticLocation Pos(S, BRC.getSourceManager(), + N->getLocationContext()); + llvm::StringLiteral Msg = "Stream is closed here"; + return std::make_shared<PathDiagnosticEventPiece>(Pos, Msg); + } +}; +} // namespace + ProgramStateRef StreamChecker::ensureStreamOpened(SVal StreamVal, CheckerContext &C, ProgramStateRef State) const { @@ -1849,11 +1889,11 @@ ProgramStateRef StreamChecker::ensureStreamOpened(SVal StreamVal, if (SS->isClosed()) { // Using a stream pointer after 'fclose' causes undefined behavior // according to cppreference.com . - ExplodedNode *N = C.generateErrorNode(); - if (N) { - C.emitReport(std::make_unique<PathSensitiveBugReport>( - BT_UseAfterClose, - "Stream might be already closed. Causes undefined behaviour.", N)); + if (ExplodedNode *N = C.generateErrorNode()) { + auto R = std::make_unique<PathSensitiveBugReport>( + BT_UseAfterClose, "Use of a stream that might be already closed", N); + R->addVisitor<StreamClosedVisitor>(Sym); + C.emitReport(std::move(R)); return nullptr; } diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp index be07cf51eefb..394cb26f03cf 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp @@ -12,6 +12,7 @@ #include "clang/AST/Decl.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/ExprCXX.h" +#include "clang/AST/ExprObjC.h" #include <optional> namespace clang { @@ -35,6 +36,12 @@ bool tryToFindPtrOrigin( break; } } + if (auto *POE = dyn_cast<PseudoObjectExpr>(E)) { + if (auto *RF = POE->getResultExpr()) { + E = RF; + continue; + } + } if (auto *tempExpr = dyn_cast<ParenExpr>(E)) { E = tempExpr->getSubExpr(); continue; @@ -88,7 +95,7 @@ bool tryToFindPtrOrigin( continue; } - if (isReturnValueRefCounted(callee)) + if (isRefType(callee->getReturnType())) return callback(E, true); if (isSingleton(callee)) @@ -100,6 +107,12 @@ bool tryToFindPtrOrigin( } } } + if (auto *ObjCMsgExpr = dyn_cast<ObjCMessageExpr>(E)) { + if (auto *Method = ObjCMsgExpr->getMethodDecl()) { + if (isRefType(Method->getReturnType())) + return callback(E, true); + } + } if (auto *unaryOp = dyn_cast<UnaryOperator>(E)) { // FIXME: Currently accepts ANY unary operator. Is it OK? E = unaryOp->getSubExpr(); diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp index f48b2fd9dca7..9da3e54e4543 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp @@ -123,9 +123,8 @@ bool isCtorOfRefCounted(const clang::FunctionDecl *F) { || FunctionName == "Identifier"; } -bool isReturnValueRefCounted(const clang::FunctionDecl *F) { - assert(F); - QualType type = F->getReturnType(); +bool isRefType(const clang::QualType T) { + QualType type = T; while (!type.isNull()) { if (auto *elaboratedT = type->getAs<ElaboratedType>()) { type = elaboratedT->desugar(); diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h index 2932e62ad06e..e2d0342bebd5 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h @@ -62,8 +62,8 @@ bool isRefType(const std::string &Name); /// false if not. bool isCtorOfRefCounted(const clang::FunctionDecl *F); -/// \returns true if \p F returns a ref-counted object, false if not. -bool isReturnValueRefCounted(const clang::FunctionDecl *F); +/// \returns true if \p T is RefPtr, Ref, or its variant, false if not. +bool isRefType(const clang::QualType T); /// \returns true if \p M is getter of a ref-counted class, false if not. std::optional<bool> isGetterOfRefCounted(const clang::CXXMethodDecl* Method); diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/RefCntblBaseVirtualDtorChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/RefCntblBaseVirtualDtorChecker.cpp index ecba5f9aa23e..e80246f49a31 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/RefCntblBaseVirtualDtorChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/RefCntblBaseVirtualDtorChecker.cpp @@ -72,7 +72,7 @@ public: if (name == "ensureOnMainThread" || name == "ensureOnMainRunLoop") { for (unsigned i = 0; i < CE->getNumArgs(); ++i) { auto *Arg = CE->getArg(i); - if (VisitLabmdaArgument(Arg)) + if (VisitLambdaArgument(Arg)) return true; } } @@ -80,17 +80,24 @@ public: return false; } - bool VisitLabmdaArgument(const Expr *E) { + bool VisitLambdaArgument(const Expr *E) { E = E->IgnoreParenCasts(); if (auto *TempE = dyn_cast<CXXBindTemporaryExpr>(E)) E = TempE->getSubExpr(); + E = E->IgnoreParenCasts(); + if (auto *Ref = dyn_cast<DeclRefExpr>(E)) { + if (auto *VD = dyn_cast_or_null<VarDecl>(Ref->getDecl())) + return VisitLambdaArgument(VD->getInit()); + return false; + } + if (auto *Lambda = dyn_cast<LambdaExpr>(E)) { + if (VisitBody(Lambda->getBody())) + return true; + } if (auto *ConstructE = dyn_cast<CXXConstructExpr>(E)) { for (unsigned i = 0; i < ConstructE->getNumArgs(); ++i) { - auto *Arg = ConstructE->getArg(i); - if (auto *Lambda = dyn_cast<LambdaExpr>(Arg)) { - if (VisitBody(Lambda->getBody())) - return true; - } + if (VisitLambdaArgument(ConstructE->getArg(i))) + return true; } } return false; diff --git a/clang/test/Analysis/Checkers/WebKit/ref-cntbl-crtp-base-no-virtual-dtor.cpp b/clang/test/Analysis/Checkers/WebKit/ref-cntbl-crtp-base-no-virtual-dtor.cpp index 01527addb529..33c60ea8ca64 100644 --- a/clang/test/Analysis/Checkers/WebKit/ref-cntbl-crtp-base-no-virtual-dtor.cpp +++ b/clang/test/Analysis/Checkers/WebKit/ref-cntbl-crtp-base-no-virtual-dtor.cpp @@ -119,6 +119,11 @@ public: ensureOnMainThread([this] { delete static_cast<const T*>(this); }); + } else if constexpr (destructionThread == DestructionThread::MainRunLoop) { + auto deleteThis = [this] { + delete static_cast<const T*>(this); + }; + ensureOnMainThread(deleteThis); } } @@ -230,3 +235,16 @@ public: private: FancyRefCountedClass4(); }; + +class FancyRefCountedClass5 final : public ThreadSafeRefCounted<FancyRefCountedClass5, DestructionThread::MainRunLoop> { +public: + static Ref<FancyRefCountedClass5> create() + { + return adoptRef(*new FancyRefCountedClass5()); + } + + virtual ~FancyRefCountedClass5(); + +private: + FancyRefCountedClass5(); +}; diff --git a/clang/test/Analysis/Checkers/WebKit/uncounted-obj-arg.mm b/clang/test/Analysis/Checkers/WebKit/uncounted-obj-arg.mm index db0c5b19eec5..9ad1880e9d11 100644 --- a/clang/test/Analysis/Checkers/WebKit/uncounted-obj-arg.mm +++ b/clang/test/Analysis/Checkers/WebKit/uncounted-obj-arg.mm @@ -24,3 +24,20 @@ } @end + +class RefCountedObject { +public: + void ref() const; + void deref() const; + Ref<RefCountedObject> copy() const; +}; + +@interface WrapperObj : NSObject + +- (Ref<RefCountedObject>)_protectedWebExtensionControllerConfiguration; + +@end + +static void foo(WrapperObj *configuration) { + configuration._protectedWebExtensionControllerConfiguration->copy(); +} diff --git a/clang/test/Analysis/stream-error.c b/clang/test/Analysis/stream-error.c index 3f791d133464..9de56c082e82 100644 --- a/clang/test/Analysis/stream-error.c +++ b/clang/test/Analysis/stream-error.c @@ -96,7 +96,7 @@ void error_fread(void) { } } fclose(F); - Ret = fread(Buf, 1, 10, F); // expected-warning {{Stream might be already closed}} + Ret = fread(Buf, 1, 10, F); // expected-warning {{Use of a stream that might be already closed}} } void error_fwrite(void) { @@ -113,7 +113,7 @@ void error_fwrite(void) { fwrite(0, 1, 10, F); // expected-warning {{might be 'indeterminate'}} } fclose(F); - Ret = fwrite(0, 1, 10, F); // expected-warning {{Stream might be already closed}} + Ret = fwrite(0, 1, 10, F); // expected-warning {{Use of a stream that might be already closed}} } void error_fgetc(void) { @@ -135,7 +135,7 @@ void error_fgetc(void) { } } fclose(F); - fgetc(F); // expected-warning {{Stream might be already closed}} + fgetc(F); // expected-warning {{Use of a stream that might be already closed}} } void error_fgets(void) { @@ -158,7 +158,7 @@ void error_fgets(void) { } } fclose(F); - fgets(Buf, sizeof(Buf), F); // expected-warning {{Stream might be already closed}} + fgets(Buf, sizeof(Buf), F); // expected-warning {{Use of a stream that might be already closed}} } void error_fputc(int fd) { @@ -176,7 +176,7 @@ void error_fputc(int fd) { fputc('Y', F); // no-warning } fclose(F); - fputc('A', F); // expected-warning {{Stream might be already closed}} + fputc('A', F); // expected-warning {{Use of a stream that might be already closed}} } void error_fputs(void) { @@ -194,7 +194,7 @@ void error_fputs(void) { fputs("QWD", F); // expected-warning {{might be 'indeterminate'}} } fclose(F); - fputs("ABC", F); // expected-warning {{Stream might be already closed}} + fputs("ABC", F); // expected-warning {{Use of a stream that might be already closed}} } void error_fprintf(void) { @@ -211,7 +211,7 @@ void error_fprintf(void) { fprintf(F, "bbb"); // expected-warning {{might be 'indeterminate'}} } fclose(F); - fprintf(F, "ccc"); // expected-warning {{Stream might be already closed}} + fprintf(F, "ccc"); // expected-warning {{Use of a stream that might be already closed}} } void error_fscanf(int *A) { @@ -236,7 +236,7 @@ void error_fscanf(int *A) { } } fclose(F); - fscanf(F, "ccc"); // expected-warning {{Stream might be already closed}} + fscanf(F, "ccc"); // expected-warning {{Use of a stream that might be already closed}} } void error_ungetc(int TestIndeterminate) { @@ -256,7 +256,7 @@ void error_ungetc(int TestIndeterminate) { ungetc('X', F); // expected-warning {{might be 'indeterminate'}} } fclose(F); - ungetc('A', F); // expected-warning {{Stream might be already closed}} + ungetc('A', F); // expected-warning {{Use of a stream that might be already closed}} } void error_getdelim(char *P, size_t Sz) { @@ -278,7 +278,7 @@ void error_getdelim(char *P, size_t Sz) { } } fclose(F); - getdelim(&P, &Sz, '\n', F); // expected-warning {{Stream might be already closed}} + getdelim(&P, &Sz, '\n', F); // expected-warning {{Use of a stream that might be already closed}} } void error_getline(char *P, size_t Sz) { @@ -300,7 +300,7 @@ void error_getline(char *P, size_t Sz) { } } fclose(F); - getline(&P, &Sz, F); // expected-warning {{Stream might be already closed}} + getline(&P, &Sz, F); // expected-warning {{Use of a stream that might be already closed}} } void write_after_eof_is_allowed(void) { diff --git a/clang/test/Analysis/stream-note.c b/clang/test/Analysis/stream-note.c index 3aef707d5005..2b5d1edb2814 100644 --- a/clang/test/Analysis/stream-note.c +++ b/clang/test/Analysis/stream-note.c @@ -264,3 +264,12 @@ void error_fseek_read_eof(void) { fgetc(F); // no warning fclose(F); } + +void check_note_at_use_after_close(void) { + FILE *F = tmpfile(); + if (!F) // expected-note {{'F' is non-null}} expected-note {{Taking false branch}} + return; + fclose(F); // expected-note {{Stream is closed here}} + rewind(F); // expected-warning {{Use of a stream that might be already closed}} + // expected-note@-1 {{Use of a stream that might be already closed}} +} diff --git a/clang/test/Analysis/stream.c b/clang/test/Analysis/stream.c index b9a5b1ba8cd4..758b40cca493 100644 --- a/clang/test/Analysis/stream.c +++ b/clang/test/Analysis/stream.c @@ -185,7 +185,7 @@ void f_double_close(void) { if (!p) return; fclose(p); - fclose(p); // expected-warning {{Stream might be already closed}} + fclose(p); // expected-warning {{Use of a stream that might be already closed}} } void f_double_close_alias(void) { @@ -194,7 +194,7 @@ void f_double_close_alias(void) { return; FILE *p2 = p1; fclose(p1); - fclose(p2); // expected-warning {{Stream might be already closed}} + fclose(p2); // expected-warning {{Use of a stream that might be already closed}} } void f_use_after_close(void) { @@ -202,7 +202,7 @@ void f_use_after_close(void) { if (!p) return; fclose(p); - clearerr(p); // expected-warning {{Stream might be already closed}} + clearerr(p); // expected-warning {{Use of a stream that might be already closed}} } void f_open_after_close(void) { @@ -266,7 +266,7 @@ void check_freopen_2(void) { if (f2) { // Check if f1 and f2 point to the same stream. fclose(f1); - fclose(f2); // expected-warning {{Stream might be already closed.}} + fclose(f2); // expected-warning {{Use of a stream that might be already closed}} } else { // Reopen failed. // f1 is non-NULL but points to a possibly invalid stream. @@ -370,7 +370,7 @@ void fflush_after_fclose(void) { if ((Ret = fflush(F)) != 0) clang_analyzer_eval(Ret == EOF); // expected-warning {{TRUE}} fclose(F); - fflush(F); // expected-warning {{Stream might be already closed}} + fflush(F); // expected-warning {{Use of a stream that might be already closed}} } void fflush_on_open_failed_stream(void) { diff --git a/clang/test/C/C23/n3030.c b/clang/test/C/C23/n3030.c new file mode 100644 index 000000000000..9e1405a2e0e1 --- /dev/null +++ b/clang/test/C/C23/n3030.c @@ -0,0 +1,93 @@ +// RUN: %clang_cc1 -verify -triple x86_64-unknown-linux-gnu -fsyntax-only -std=c23 %s -pedantic -Wall + +#include <limits.h> + +enum us : unsigned short { + us_max = USHRT_MAX, + us_violation, // expected-error {{enumerator value 65536 is not representable in the underlying type 'unsigned short'}} + us_violation_2 = us_max + 1, // expected-error {{enumerator value is not representable in the underlying type 'unsigned short'}} + us_wrap_around_to_zero = (unsigned short)(USHRT_MAX + 1) /* Okay: conversion + done in constant expression before conversion to + underlying type: unsigned semantics okay. */ +}; + +enum ui : unsigned int { + ui_max = UINT_MAX, + ui_violation, // expected-error {{enumerator value 4294967296 is not representable in the underlying type 'unsigned int'}} + ui_no_violation = ui_max + 1, + ui_wrap_around_to_zero = (unsigned int)(UINT_MAX + 1) +}; + +enum E1 : short; +enum E2 : short; // expected-note {{previous}} +enum E3; // expected-warning {{ISO C forbids forward references to 'enum' types}} +enum E4 : unsigned long long; + +enum E1 : short { m11, m12 }; +enum E1 x = m11; + +enum E2 : long { // expected-error {{enumeration redeclared with different underlying type 'long' (was 'short')}} + m21, + m22 +}; + +enum E3 { // expected-note {{definition of 'enum E3' is not complete until the closing '}'}} + // expected-note@-1 {{previous}} + m31, + m32, + m33 = sizeof(enum E3) // expected-error {{invalid application of 'sizeof' to an incomplete type 'enum E3'}} +}; +enum E3 : int; // expected-error {{enumeration previously declared with nonfixed underlying type}} + +enum E4 : unsigned long long { + m40 = sizeof(enum E4), + m41 = ULLONG_MAX, + m42 // expected-error {{enumerator value 18446744073709551616 is not representable in the underlying type 'unsigned long long'}} +}; + +enum E5 y; // expected-error {{tentative definition has type 'enum E5' that is never completed}} + // expected-warning@-1 {{ISO C forbids forward references to 'enum' types}} + // expected-note@-2 {{forward declaration of 'enum E5'}} +enum E6 : long int z; // expected-error {{non-defining declaration of enumeration with a fixed underlying type is only permitted as a standalone declaration; missing list of enumerators?}} +enum E7 : long int = 0; // expected-error {{non-defining declaration of enumeration with a fixed underlying type is only permitted as a standalone declaration; missing list of enumerators?}} + // expected-error@-1 {{expected identifier or '('}} + +enum underlying : unsigned char { b0 }; + +constexpr int a = _Generic(b0, int: 2, unsigned char: 1, default: 0); +constexpr int b = _Generic((enum underlying)b0, int: 2, unsigned char: 1, default: 0); +static_assert(a == 1); +static_assert(b == 1); + +void f1(enum a : long b); // expected-error {{non-defining declaration of enumeration with a fixed underlying type is only permitted as a standalone declaration; missing list of enumerators?}} + // expected-warning@-1 {{declaration of 'enum a' will not be visible outside of this function}} +void f2(enum c : long{x} d); // expected-warning {{declaration of 'enum c' will not be visible outside of this function}} +enum e : int f3(); // expected-error {{non-defining declaration of enumeration with a fixed underlying type is only permitted as a standalone declaration; missing list of enumerators?}} + +typedef enum t u; // expected-warning {{ISO C forbids forward references to 'enum' types}} +typedef enum v : short W; // expected-error {{non-defining declaration of enumeration with a fixed underlying type is only permitted as a standalone declaration; missing list of enumerators?}} +typedef enum q : short { s } R; + +struct s1 { + int x; + enum e:int : 1; // expected-error {{non-defining declaration of enumeration with a fixed underlying type is only permitted as a standalone declaration; missing list of enumerators?}} + int y; +}; + +enum forward; // expected-warning {{ISO C forbids forward references to 'enum' types}} +extern enum forward fwd_val0; /* Constraint violation: incomplete type */ +extern enum forward *fwd_ptr0; // expected-note {{previous}} +extern int + *fwd_ptr0; // expected-error {{redeclaration of 'fwd_ptr0' with a different type: 'int *' vs 'enum forward *'}} + +enum forward1 : int; +extern enum forward1 fwd_val1; +extern int fwd_val1; +extern enum forward1 *fwd_ptr1; +extern int *fwd_ptr1; + +enum ee1 : short; +enum e : short f = 0; // expected-error {{non-defining declaration of enumeration with a fixed underlying type is only permitted as a standalone declaration; missing list of enumerators?}} +enum g : short { yyy } h = yyy; + +enum ee2 : typeof ((enum ee3 : short { A })0, (short)0); diff --git a/clang/test/CMakeLists.txt b/clang/test/CMakeLists.txt index 299a35723b59..2d84b0d73053 100644 --- a/clang/test/CMakeLists.txt +++ b/clang/test/CMakeLists.txt @@ -72,7 +72,6 @@ list(APPEND CLANG_TEST_DEPS clang-tblgen clang-offload-bundler clang-import-test - clang-rename clang-refactor clang-diff clang-installapi diff --git a/clang/test/CodeGen/X86/avx512copy-builtins.c b/clang/test/CodeGen/X86/avx512copy-builtins.c new file mode 100644 index 000000000000..06f7507bde53 --- /dev/null +++ b/clang/test/CodeGen/X86/avx512copy-builtins.c @@ -0,0 +1,17 @@ +// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx10.2-512 \ +// RUN: -emit-llvm -o - -Wall -Werror -pedantic -Wno-gnu-statement-expression | FileCheck %s + +#include <immintrin.h> +#include <stddef.h> + +__m128i test_mm_move_epi32(__m128i A) { + // CHECK-LABEL: test_mm_move_epi32 + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 4, i32 4> + return _mm_move_epi32(A); +} + +__m128i test_mm_move_epi16(__m128i A) { + // CHECK-LABEL: test_mm_move_epi16 + // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> + return _mm_move_epi16(A); +} diff --git a/clang/test/CodeGen/builtins-elementwise-math.c b/clang/test/CodeGen/builtins-elementwise-math.c index 8fb52992c0fe..7e094a52653e 100644 --- a/clang/test/CodeGen/builtins-elementwise-math.c +++ b/clang/test/CodeGen/builtins-elementwise-math.c @@ -570,6 +570,43 @@ void test_builtin_elementwise_log2(float f1, float f2, double d1, double d2, vf2 = __builtin_elementwise_log2(vf1); } +void test_builtin_elementwise_popcount(si8 vi1, si8 vi2, + long long int i1, long long int i2, short si, + _BitInt(31) bi1, _BitInt(31) bi2) { + + + // CHECK: [[I1:%.+]] = load i64, ptr %i1.addr, align 8 + // CHECK-NEXT: call i64 @llvm.ctpop.i64(i64 [[I1]]) + i2 = __builtin_elementwise_popcount(i1); + + // CHECK: [[VI1:%.+]] = load <8 x i16>, ptr %vi1.addr, align 16 + // CHECK-NEXT: call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> [[VI1]]) + vi2 = __builtin_elementwise_popcount(vi1); + + // CHECK: [[CVI2:%.+]] = load <8 x i16>, ptr %cvi2, align 16 + // CHECK-NEXT: call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> [[CVI2]]) + const si8 cvi2 = vi2; + vi2 = __builtin_elementwise_popcount(cvi2); + + // CHECK: [[BI1:%.+]] = load i32, ptr %bi1.addr, align 4 + // CHECK-NEXT: [[LOADEDV:%.+]] = trunc i32 [[BI1]] to i31 + // CHECK-NEXT: call i31 @llvm.ctpop.i31(i31 [[LOADEDV]]) + bi2 = __builtin_elementwise_popcount(bi1); + + // CHECK: [[IA1:%.+]] = load i32, ptr addrspace(1) @int_as_one, align 4 + // CHECK-NEXT: call i32 @llvm.ctpop.i32(i32 [[IA1]]) + b = __builtin_elementwise_popcount(int_as_one); + + // CHECK: call i32 @llvm.ctpop.i32(i32 -10) + b = __builtin_elementwise_popcount(-10); + + // CHECK: [[SI:%.+]] = load i16, ptr %si.addr, align 2 + // CHECK-NEXT: [[SI_EXT:%.+]] = sext i16 [[SI]] to i32 + // CHECK-NEXT: [[RES:%.+]] = call i32 @llvm.ctpop.i32(i32 [[SI_EXT]]) + // CHECK-NEXT: = trunc i32 [[RES]] to i16 + si = __builtin_elementwise_popcount(si); +} + void test_builtin_elementwise_pow(float f1, float f2, double d1, double d2, float4 vf1, float4 vf2) { diff --git a/clang/test/CodeGenCoroutines/coro-await-elidable.cpp b/clang/test/CodeGenCoroutines/coro-await-elidable.cpp index 8512995dfad4..deb19b4a5004 100644 --- a/clang/test/CodeGenCoroutines/coro-await-elidable.cpp +++ b/clang/test/CodeGenCoroutines/coro-await-elidable.cpp @@ -84,4 +84,44 @@ Task<int> nonelidable() { co_return 1; } +// CHECK-LABEL: define{{.*}} @_Z8addTasksO4TaskIiES1_{{.*}} { +Task<int> addTasks([[clang::coro_await_elidable_argument]] Task<int> &&t1, Task<int> &&t2) { + int i1 = co_await t1; + int i2 = co_await t2; + co_return i1 + i2; +} + +// CHECK-LABEL: define{{.*}} @_Z10returnSamei{{.*}} { +Task<int> returnSame(int i) { + co_return i; +} + +// CHECK-LABEL: define{{.*}} @_Z21elidableWithMustAwaitv{{.*}} { +Task<int> elidableWithMustAwait() { + // CHECK: call void @_Z10returnSamei(ptr {{.*}}, i32 noundef 2) #[[ELIDE_SAFE]] + // CHECK: call void @_Z10returnSamei(ptr {{.*}}, i32 noundef 3){{$}} + co_return co_await addTasks(returnSame(2), returnSame(3)); +} + +template <typename... Args> +Task<int> sumAll([[clang::coro_await_elidable_argument]] Args && ... tasks); + +// CHECK-LABEL: define{{.*}} @_Z16elidableWithPackv{{.*}} { +Task<int> elidableWithPack() { + // CHECK: call void @_Z10returnSamei(ptr {{.*}}, i32 noundef 1){{$}} + // CHECK: call void @_Z10returnSamei(ptr {{.*}}, i32 noundef 2) #[[ELIDE_SAFE]] + // CHECK: call void @_Z10returnSamei(ptr {{.*}}, i32 noundef 3) #[[ELIDE_SAFE]] + auto t = returnSame(1); + co_return co_await sumAll(t, returnSame(2), returnSame(3)); +} + + +// CHECK-LABEL: define{{.*}} @_Z25elidableWithPackRecursivev{{.*}} { +Task<int> elidableWithPackRecursive() { + // CHECK: call void @_Z10returnSamei(ptr {{.*}}, i32 noundef 1) #[[ELIDE_SAFE]] + // CHECK: call void @_Z10returnSamei(ptr {{.*}}, i32 noundef 2){{$}} + // CHECK: call void @_Z10returnSamei(ptr {{.*}}, i32 noundef 3) #[[ELIDE_SAFE]] + co_return co_await sumAll(addTasks(returnSame(1), returnSame(2)), returnSame(3)); +} + // CHECK: attributes #[[ELIDE_SAFE]] = { coro_elide_safe } diff --git a/clang/test/CodeGenHLSL/builtins/countbits.hlsl b/clang/test/CodeGenHLSL/builtins/countbits.hlsl new file mode 100644 index 000000000000..8dfe977bfae6 --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/countbits.hlsl @@ -0,0 +1,80 @@ +// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \ +// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \ +// RUN: -emit-llvm -disable-llvm-passes -O3 -o - | FileCheck %s + +#ifdef __HLSL_ENABLE_16_BIT +// CHECK-LABEL: test_countbits_ushort +// CHECK: call i16 @llvm.ctpop.i16 +uint16_t test_countbits_ushort(uint16_t p0) +{ + return countbits(p0); +} +// CHECK-LABEL: test_countbits_ushort2 +// CHECK: call <2 x i16> @llvm.ctpop.v2i16 +uint16_t2 test_countbits_ushort2(uint16_t2 p0) +{ + return countbits(p0); +} +// CHECK-LABEL: test_countbits_ushort3 +// CHECK: call <3 x i16> @llvm.ctpop.v3i16 +uint16_t3 test_countbits_ushort3(uint16_t3 p0) +{ + return countbits(p0); +} +// CHECK-LABEL: test_countbits_ushort4 +// CHECK: call <4 x i16> @llvm.ctpop.v4i16 +uint16_t4 test_countbits_ushort4(uint16_t4 p0) +{ + return countbits(p0); +} +#endif + +// CHECK-LABEL: test_countbits_uint +// CHECK: call i32 @llvm.ctpop.i32 +int test_countbits_uint(uint p0) +{ + return countbits(p0); +} +// CHECK-LABEL: test_countbits_uint2 +// CHECK: call <2 x i32> @llvm.ctpop.v2i32 +uint2 test_countbits_uint2(uint2 p0) +{ + return countbits(p0); +} +// CHECK-LABEL: test_countbits_uint3 +// CHECK: call <3 x i32> @llvm.ctpop.v3i32 +uint3 test_countbits_uint3(uint3 p0) +{ + return countbits(p0); +} +// CHECK-LABEL: test_countbits_uint4 +// CHECK: call <4 x i32> @llvm.ctpop.v4i32 +uint4 test_countbits_uint4(uint4 p0) +{ + return countbits(p0); +} + +// CHECK-LABEL: test_countbits_long +// CHECK: call i64 @llvm.ctpop.i64 +uint64_t test_countbits_long(uint64_t p0) +{ + return countbits(p0); +} +// CHECK-LABEL: test_countbits_long2 +// CHECK: call <2 x i64> @llvm.ctpop.v2i64 +uint64_t2 test_countbits_long2(uint64_t2 p0) +{ + return countbits(p0); +} +// CHECK-LABEL: test_countbits_long3 +// CHECK: call <3 x i64> @llvm.ctpop.v3i64 +uint64_t3 test_countbits_long3(uint64_t3 p0) +{ + return countbits(p0); +} +// CHECK-LABEL: test_countbits_long4 +// CHECK: call <4 x i64> @llvm.ctpop.v4i64 +uint64_t4 test_countbits_long4(uint64_t4 p0) +{ + return countbits(p0); +} diff --git a/clang/test/CodeGenHLSL/wavesize.hlsl b/clang/test/CodeGenHLSL/wavesize.hlsl new file mode 100644 index 000000000000..fcb817ce0d06 --- /dev/null +++ b/clang/test/CodeGenHLSL/wavesize.hlsl @@ -0,0 +1,34 @@ +// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \ +// RUN: dxil-pc-shadermodel6.6-compute %s -DSM66 -hlsl-entry foo \ +// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s + +// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \ +// RUN: dxil-pc-shadermodel6.8-compute %s -DNO_PREFERR -hlsl-entry foo \ +// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefix=NO_PREFERR + +// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \ +// RUN: dxil-pc-shadermodel6.8-compute %s -hlsl-entry foo \ +// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefix=CHECK-SM68 + + +// Make sure wavesize attribute get correct value for sm66 and sm68. +// CHECK:define void @foo() +// CHECK:"hlsl.wavesize"="8,0,0" + +// NO_PREFERR:define void @foo() +// NO_PREFERR:"hlsl.wavesize"="8,128,0" + +// CHECK-SM68:define void @foo() +// CHECK-SM68:"hlsl.wavesize"="8,128,64" + +[numthreads(16,8,1)] +#ifdef SM66 +[WaveSize(8)] +#elif NO_PREFERR +[WaveSize(8, 128)] +#else +[WaveSize(8, 128, 64)] +#endif +void foo() { + +} diff --git a/clang/test/Driver/openmp-offload-gpu.c b/clang/test/Driver/openmp-offload-gpu.c index ef6cbdded6a6..f6e2245dcdbc 100644 --- a/clang/test/Driver/openmp-offload-gpu.c +++ b/clang/test/Driver/openmp-offload-gpu.c @@ -377,4 +377,4 @@ // RUN: --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \ // RUN: --offload-arch=sm_52 -nogpulibc -nogpuinc %s 2>&1 \ // RUN: | FileCheck --check-prefix=LIBC-GPU %s -// LIBC-GPU: clang-linker-wrapper{{.*}}"--device-compiler=-nolibc" +// LIBC-GPU-NOT: clang-linker-wrapper{{.*}}"--device-linker" diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test index baa1816358b1..914f94c08a9f 100644 --- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test +++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test @@ -60,6 +60,7 @@ // CHECK-NEXT: ConsumableSetOnRead (SubjectMatchRule_record) // CHECK-NEXT: Convergent (SubjectMatchRule_function) // CHECK-NEXT: CoroAwaitElidable (SubjectMatchRule_record) +// CHECK-NEXT: CoroAwaitElidableArgument (SubjectMatchRule_variable_is_parameter) // CHECK-NEXT: CoroDisableLifetimeBound (SubjectMatchRule_function) // CHECK-NEXT: CoroLifetimeBound (SubjectMatchRule_record) // CHECK-NEXT: CoroOnlyDestroyWhenComplete (SubjectMatchRule_record) diff --git a/clang/test/Modules/pr108732.cppm b/clang/test/Modules/pr108732.cppm new file mode 100644 index 000000000000..f3b495aa826c --- /dev/null +++ b/clang/test/Modules/pr108732.cppm @@ -0,0 +1,14 @@ +// RUN: %clang_cc1 -std=c++20 %s -ast-dump | FileCheck %s +export module mod; + +extern "C++" { +class C +{ +public: +bool foo() const { + return true; +} +}; +} + +// CHECK: foo {{.*}}implicit-inline diff --git a/clang/test/PCH/race-condition.cpp b/clang/test/PCH/race-condition.cpp new file mode 100644 index 000000000000..752b0cc3ff62 --- /dev/null +++ b/clang/test/PCH/race-condition.cpp @@ -0,0 +1,41 @@ +// RUN: %clang_cc1 -fallow-pch-with-compiler-errors -std=c++20 -x c++-header -emit-pch %s -o %t -verify +// RUN: %clang_cc1 -fallow-pch-with-compiler-errors -std=c++20 -include-pch %t %s -verify +#ifndef HEADER_H +#define HEADER_H + +#include "bad_include.h" +// expected-error@6{{'bad_include.h' file not found}} + +template <bool, class = void> struct enable_if {}; +template <class T> struct enable_if<true, T> { typedef T type; }; +template <bool B, class T = void> using enable_if_t = typename enable_if<B, T>::type; + +template <typename> struct meta { static constexpr int value = 0; }; +template <> struct meta<int> { static constexpr int value = 1; }; +template <> struct meta<float> { static constexpr int value = 2; }; + +namespace N { +inline namespace inner { + +template <class T> +constexpr enable_if_t<meta<T>::value == 0, void> midpoint(T) {} + +template <class U> +constexpr enable_if_t<meta<U>::value == 1, void> midpoint(U) {} + +template <class F> +constexpr enable_if_t<meta<F>::value == 2, void> midpoint(F) {} + +} // namespace inner +} // namespace N + +#else + +// expected-error@27{{'N::midpoint' has different definitions in different modules; defined here first difference is 1st parameter with type 'F'}} +// expected-error@24{{'N::midpoint' has different definitions in different modules; defined here first difference is 1st parameter with type 'U'}} +// expected-note@21{{but in '' found 1st parameter with type 'T'}} +int x = N::something; +// expected-error@37{{no member named 'something' in namespace 'N'}} +// expected-note@21{{but in '' found 1st parameter with type 'T'}} + +#endif diff --git a/clang/test/Parser/cxx-bad-cast-diagnose-broken-template.cpp b/clang/test/Parser/cxx-bad-cast-diagnose-broken-template.cpp new file mode 100644 index 000000000000..3500975d9369 --- /dev/null +++ b/clang/test/Parser/cxx-bad-cast-diagnose-broken-template.cpp @@ -0,0 +1,26 @@ +// RUN: %clang_cc1 -fcxx-exceptions -fexceptions -fsyntax-only -verify %s + +template<typename> +struct StringTrait {}; + +template< int N > +struct StringTrait< const char[ N ] > { + typedef char CharType; + static const MissingIntT length = N - 1; // expected-error {{unknown type name 'MissingIntT'}} +}; + +class String { +public: + template <typename T> + String(T& str, typename StringTrait<T>::CharType = 0); +}; + + +class Exception { +public: + Exception(String const&); +}; + +void foo() { + throw Exception("some error"); // expected-error {{functional-style cast from 'const char[11]' to 'Exception' is not allowed}} +} diff --git a/clang/test/Sema/builtins-elementwise-math.c b/clang/test/Sema/builtins-elementwise-math.c index 628274380ae5..1727be1d6286 100644 --- a/clang/test/Sema/builtins-elementwise-math.c +++ b/clang/test/Sema/builtins-elementwise-math.c @@ -505,6 +505,39 @@ void test_builtin_elementwise_log2(int i, float f, double d, float4 v, int3 iv, // expected-error@-1 {{1st argument must be a floating point type (was 'unsigned4' (vector of 4 'unsigned int' values))}} } +void test_builtin_elementwise_popcount(int i, float f, double d, float4 v, int3 iv, unsigned u, unsigned4 uv) { + + struct Foo s = __builtin_elementwise_popcount(i); + // expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}} + + i = __builtin_elementwise_popcount(); + // expected-error@-1 {{too few arguments to function call, expected 1, have 0}} + + i = __builtin_elementwise_popcount(f); + // expected-error@-1 {{1st argument must be a vector of integers (was 'float')}} + + i = __builtin_elementwise_popcount(f, f); + // expected-error@-1 {{too many arguments to function call, expected 1, have 2}} + + u = __builtin_elementwise_popcount(d); + // expected-error@-1 {{1st argument must be a vector of integers (was 'double')}} + + v = __builtin_elementwise_popcount(v); + // expected-error@-1 {{1st argument must be a vector of integers (was 'float4' (vector of 4 'float' values))}} + + int2 i2 = __builtin_elementwise_popcount(iv); + // expected-error@-1 {{initializing 'int2' (vector of 2 'int' values) with an expression of incompatible type 'int3' (vector of 3 'int' values)}} + + iv = __builtin_elementwise_popcount(i2); + // expected-error@-1 {{assigning to 'int3' (vector of 3 'int' values) from incompatible type 'int2' (vector of 2 'int' values)}} + + unsigned3 u3 = __builtin_elementwise_popcount(iv); + // expected-error@-1 {{initializing 'unsigned3' (vector of 3 'unsigned int' values) with an expression of incompatible type 'int3' (vector of 3 'int' values)}} + + iv = __builtin_elementwise_popcount(u3); + // expected-error@-1 {{assigning to 'int3' (vector of 3 'int' values) from incompatible type 'unsigned3' (vector of 3 'unsigned int' values)}} +} + void test_builtin_elementwise_pow(int i, short s, double d, float4 v, int3 iv, unsigned3 uv, int *p) { i = __builtin_elementwise_pow(p, d); // expected-error@-1 {{arguments are of different types ('int *' vs 'double')}} diff --git a/clang/test/Sema/countbits-errors.hlsl b/clang/test/Sema/countbits-errors.hlsl new file mode 100644 index 000000000000..0fd36fe78d79 --- /dev/null +++ b/clang/test/Sema/countbits-errors.hlsl @@ -0,0 +1,28 @@ +// RUN: %clang_cc1 -finclude-default-header +// -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only +// -disable-llvm-passes -verify + +double2 test_int_builtin(double2 p0) { + return __builtin_hlsl_elementwise_countbits(p0); + // expected-error@-1 {{passing 'double2' (aka 'vector<double, 2>') to + // parameter of incompatible type + // '__attribute__((__vector_size__(2 * sizeof(int)))) int' + // (vector of 2 'int' values)}} +} + +float test_ambiguous(float p0) { + return countbits(p0); + // expected-error@-1 {{call to 'countbits' is ambiguous}} + // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}} + // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}} + // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}} + // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}} + // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}} + // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}} +} + +float test_float_builtin(float p0) { + return __builtin_hlsl_elementwise_countbits(p0); + // expected-error@-1 {{passing 'double' to parameter of incompatible type + // 'int'}} +} diff --git a/clang/test/Sema/fixed-enum.c b/clang/test/Sema/fixed-enum.c index 954ff8c452b8..2b02def0e178 100644 --- a/clang/test/Sema/fixed-enum.c +++ b/clang/test/Sema/fixed-enum.c @@ -5,9 +5,9 @@ // RUN: %clang_cc1 -pedantic -std=c11 -xc -DC11 -verify %s // RUN: %clang_cc1 -Weverything -std=c11 -xc -fms-extensions -DMS -verify %s // RUN: %clang_cc1 -Weverything -std=c2x -xc -DC23 -verify %s -// RUN: %clang_cc1 -pedantic -std=c2x -xc -DC23 -verify %s +// RUN: %clang_cc1 -pedantic -std=c2x -xc -DC23 -verify -Wpre-c23-compat %s // RUN: %clang_cc1 -Weverything -std=c23 -xc -DC23 -verify %s -// RUN: %clang_cc1 -pedantic -std=c23 -xc -DC23 -verify %s +// RUN: %clang_cc1 -pedantic -std=c23 -xc -DC23 -verify -Wpre-c23-compat %s // RUN: %clang_cc1 -Weverything -std=c23 -xc -fms-extensions -DC23 -verify %s enum X : int {e}; @@ -15,12 +15,14 @@ enum X : int {e}; // expected-warning@-2{{enumeration types with a fixed underlying type are incompatible with C++98}} #elif defined(CXX03) // expected-warning@-4{{enumeration types with a fixed underlying type are a C++11 extension}} -#elif defined(OBJC) || defined(C23) -// No diagnostic +#elif defined(OBJC) +// diagnostic +#elif defined(C23) +// expected-warning@-8{{enumeration types with a fixed underlying type are incompatible with C standards before C23}} #elif defined(C11) -// expected-warning@-8{{enumeration types with a fixed underlying type are a Clang extension}} +// expected-warning@-10{{enumeration types with a fixed underlying type are a C23 extension}} #elif defined(MS) -// expected-warning@-10{{enumeration types with a fixed underlying type are a Microsoft extension}} +// expected-warning@-12{{enumeration types with a fixed underlying type are a Microsoft extension}} #endif // Don't warn about the forward declaration in any language mode. @@ -29,16 +31,23 @@ enum Fwd : int { e2 }; #if !defined(OBJC) && !defined(C23) // expected-warning@-3 {{enumeration types with a fixed underlying type}} // expected-warning@-3 {{enumeration types with a fixed underlying type}} +#elif defined(C23) +// expected-warning@-6 {{enumeration types with a fixed underlying type are incompatible with C standards before C23}} +// expected-warning@-6 {{enumeration types with a fixed underlying type are incompatible with C standards before C23}} #endif // Always error on the incompatible redeclaration. enum BadFwd : int; #if !defined(OBJC) && !defined(C23) // expected-warning@-2 {{enumeration types with a fixed underlying type}} +#elif defined(C23) +// expected-warning@-4 {{enumeration types with a fixed underlying type are incompatible with C standards before C23}} #endif -// expected-note@-4 {{previous declaration is here}} +// expected-note@-6 {{previous declaration is here}} enum BadFwd : char { e3 }; #if !defined(OBJC) && !defined(C23) // expected-warning@-2 {{enumeration types with a fixed underlying type}} +#elif defined(C23) +// expected-warning@-4 {{enumeration types with a fixed underlying type are incompatible with C standards before C23}} #endif -// expected-error@-4 {{enumeration redeclared with different underlying type 'char' (was 'int')}} +// expected-error@-6 {{enumeration redeclared with different underlying type 'char' (was 'int')}} diff --git a/clang/test/SemaCXX/builtins-elementwise-math.cpp b/clang/test/SemaCXX/builtins-elementwise-math.cpp index 898d869f4c81..c3d8bc593c0b 100644 --- a/clang/test/SemaCXX/builtins-elementwise-math.cpp +++ b/clang/test/SemaCXX/builtins-elementwise-math.cpp @@ -269,3 +269,11 @@ void test_builtin_elementwise_bitreverse() { static_assert(!is_const<decltype(__builtin_elementwise_bitreverse(a))>::value); static_assert(!is_const<decltype(__builtin_elementwise_bitreverse(b))>::value); } + +void test_builtin_elementwise_popcount() { + const int a = 2; + int b = 1; + static_assert(!is_const<decltype(__builtin_elementwise_popcount(a))>::value); + static_assert(!is_const<decltype(__builtin_elementwise_popcount(b))>::value); +} + diff --git a/clang/test/SemaCXX/cxx2a-consteval.cpp b/clang/test/SemaCXX/cxx2a-consteval.cpp index 81923617f637..ae331055c52b 100644 --- a/clang/test/SemaCXX/cxx2a-consteval.cpp +++ b/clang/test/SemaCXX/cxx2a-consteval.cpp @@ -1248,3 +1248,27 @@ void test() { } } + +// Test that we don't redundantly instantiate the friend declaration in +// RemoveNestedImmediateInvocation(). Otherwise, we would end up with spurious +// redefinition errors. +namespace GH107175 { + +consteval void consteval_func() {} + +template <auto> struct define_f { + friend void foo() {} +}; + +template <auto = [] {}> struct A {}; + +struct B { + template <auto T> consteval void func() { (void)define_f<T>{}; } +}; + +int main() { + B{}.func<A{}>(); + consteval_func(); +} + +} // namespace GH107175 diff --git a/clang/test/SemaHLSL/BuiltIns/countbits-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/countbits-errors.hlsl new file mode 100644 index 000000000000..8d5f0abb2860 --- /dev/null +++ b/clang/test/SemaHLSL/BuiltIns/countbits-errors.hlsl @@ -0,0 +1,21 @@ +// RUN: %clang_cc1 -finclude-default-header +// -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only +// -disable-llvm-passes -verify -verify-ignore-unexpected + + +double test_int_builtin(double p0) { + return countbits(p0); + // expected-error@-1 {{call to 'countbits' is ambiguous}} +} + +double2 test_int_builtin_2(double2 p0) { + return __builtin_elementwise_popcount(p0); + // expected-error@-1 {{1st argument must be a vector of integers + // (was 'double2' (aka 'vector<double, 2>'))}} +} + +double test_int_builtin_3(float p0) { + return __builtin_elementwise_popcount(p0); + // expected-error@-1 {{1st argument must be a vector of integers + // (was 'float')}} +} diff --git a/clang/test/SemaHLSL/resource_binding_attr_error.hlsl b/clang/test/SemaHLSL/resource_binding_attr_error.hlsl index cb728dca838c..74aff79f0e37 100644 --- a/clang/test/SemaHLSL/resource_binding_attr_error.hlsl +++ b/clang/test/SemaHLSL/resource_binding_attr_error.hlsl @@ -13,16 +13,9 @@ float a : register(c0); cbuffer b : register(i0) { } -// expected-error@+1 {{invalid space specifier 's2' used; expected 'space' followed by an integer, like space1}} -cbuffer c : register(b0, s2) { -} // expected-error@+1 {{register number should be an integer}} -cbuffer d : register(bf, s2) { - -} -// expected-error@+1 {{invalid space specifier 'spaces' used; expected 'space' followed by an integer, like space1}} -cbuffer e : register(b2, spaces) { +cbuffer c : register(bf, s2) { } @@ -35,9 +28,8 @@ cbuffer B : register(space1) {} // expected-error@+1 {{wrong argument format for hlsl attribute, use b2 instead}} cbuffer C : register(b 2) {} -// expected-error@+2 {{wrong argument format for hlsl attribute, use b2 instead}} -// expected-error@+1 {{wrong argument format for hlsl attribute, use space3 instead}} -cbuffer D : register(b 2, space 3) {} +// expected-error@+1 {{wrong argument format for hlsl attribute, use b2 instead}} +cbuffer D : register(b 2, space3) {} // expected-error@+1 {{'register' attribute only applies to cbuffer/tbuffer and external global variables}} static MyTemplatedSRV<float> U : register(u5); @@ -61,9 +53,6 @@ void foo2() { extern MyTemplatedSRV<float> U2 : register(u5); } -// expected-error@+1 {{binding type 'u' only applies to UAV resources}} -float b : register(u0, space1); - // expected-error@+1 {{'register' attribute only applies to cbuffer/tbuffer and external global variables}} void bar(MyTemplatedSRV<float> U : register(u3)) { diff --git a/clang/test/SemaHLSL/resource_binding_attr_error_basic.hlsl b/clang/test/SemaHLSL/resource_binding_attr_error_basic.hlsl index 0a547ed66af0..760c057630a7 100644 --- a/clang/test/SemaHLSL/resource_binding_attr_error_basic.hlsl +++ b/clang/test/SemaHLSL/resource_binding_attr_error_basic.hlsl @@ -3,37 +3,40 @@ // expected-error@+1{{binding type 't' only applies to SRV resources}}
float f1 : register(t0);
-
-float f2 : register(c0);
+// expected-error@+1 {{binding type 'u' only applies to UAV resources}}
+float f2 : register(u0);
// expected-error@+1{{binding type 'b' only applies to constant buffers. The 'bool constant' binding type is no longer supported}}
float f3 : register(b9);
+// expected-error@+1 {{binding type 's' only applies to sampler state}}
+float f4 : register(s0);
+
// expected-error@+1{{binding type 'i' ignored. The 'integer constant' binding type is no longer supported}}
-float f4 : register(i9);
+float f5 : register(i9);
// expected-error@+1{{binding type 'x' is invalid}}
-float f5 : register(x9);
+float f6 : register(x9);
cbuffer g_cbuffer1 {
// expected-error@+1{{binding type 'c' ignored in buffer declaration. Did you mean 'packoffset'?}}
- float f6 : register(c2);
+ float f7 : register(c2);
};
tbuffer g_tbuffer1 {
// expected-error@+1{{binding type 'c' ignored in buffer declaration. Did you mean 'packoffset'?}}
- float f7 : register(c2);
+ float f8 : register(c2);
};
cbuffer g_cbuffer2 {
// expected-error@+1{{binding type 'b' only applies to constant buffer resources}}
- float f8 : register(b2);
+ float f9 : register(b2);
};
tbuffer g_tbuffer2 {
// expected-error@+1{{binding type 'i' ignored. The 'integer constant' binding type is no longer supported}}
- float f9 : register(i2);
+ float f10 : register(i2);
};
// expected-error@+1{{binding type 'c' only applies to numeric variables in the global scope}}
-RWBuffer<float> f10 : register(c3);
+RWBuffer<float> f11 : register(c3);
diff --git a/clang/test/SemaHLSL/resource_binding_attr_error_space.hlsl b/clang/test/SemaHLSL/resource_binding_attr_error_space.hlsl new file mode 100644 index 000000000000..70e64e6ca752 --- /dev/null +++ b/clang/test/SemaHLSL/resource_binding_attr_error_space.hlsl @@ -0,0 +1,62 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -o - -fsyntax-only %s -verify
+
+// valid
+cbuffer cbuf {
+ RWBuffer<int> r : register(u0, space0);
+}
+
+cbuffer cbuf2 {
+ struct x {
+ // this test validates that no diagnostic is emitted on the space parameter, because
+ // this register annotation is not in the global scope.
+ // expected-error@+1 {{'register' attribute only applies to cbuffer/tbuffer and external global variables}}
+ RWBuffer<int> E : register(u2, space3);
+ };
+}
+
+struct MyStruct {
+ RWBuffer<int> E;
+};
+
+cbuffer cbuf3 {
+ // valid
+ MyStruct E : register(u2, space3);
+}
+
+// valid
+MyStruct F : register(u3, space4);
+
+cbuffer cbuf4 {
+ // this test validates that no diagnostic is emitted on the space parameter, because
+ // this register annotation is not in the global scope.
+ // expected-error@+1 {{binding type 'u' only applies to UAV resources}}
+ float a : register(u2, space3);
+}
+
+// expected-error@+1 {{invalid space specifier 's2' used; expected 'space' followed by an integer, like space1}}
+cbuffer a : register(b0, s2) {
+
+}
+
+// expected-error@+1 {{invalid space specifier 'spaces' used; expected 'space' followed by an integer, like space1}}
+cbuffer b : register(b2, spaces) {
+
+}
+
+// expected-error@+1 {{wrong argument format for hlsl attribute, use space3 instead}}
+cbuffer c : register(b2, space 3) {}
+
+// expected-error@+1 {{register space cannot be specified on global constants}}
+int d : register(c2, space3);
+
+// expected-error@+1 {{register space cannot be specified on global constants}}
+int e : register(c2, space0);
+
+// expected-error@+1 {{register space cannot be specified on global constants}}
+int f : register(c2, space00);
+
+// valid
+RWBuffer<int> g : register(u2, space0);
+
+// valid
+RWBuffer<int> h : register(u2, space0);
diff --git a/clang/test/SemaTemplate/GH18291.cpp b/clang/test/SemaTemplate/GH18291.cpp index 820564ffa6f1..2e9754b65617 100644 --- a/clang/test/SemaTemplate/GH18291.cpp +++ b/clang/test/SemaTemplate/GH18291.cpp @@ -112,3 +112,12 @@ namespace static_vs_nonstatic { } } // namespace explicit_obj_param } // namespace static_vs_nonstatic + +namespace incomplete_on_sugar { + template <unsigned P, class T> void f(T[P]) = delete; + template <unsigned P> void f(int[][P]); + void test() { + int array[1][8]; + f<8>(array); + } +} // namespace incomplete_on_sugar diff --git a/clang/test/SemaTemplate/concepts-out-of-line-def.cpp b/clang/test/SemaTemplate/concepts-out-of-line-def.cpp index 333187b0d74a..5450d105a6f5 100644 --- a/clang/test/SemaTemplate/concepts-out-of-line-def.cpp +++ b/clang/test/SemaTemplate/concepts-out-of-line-def.cpp @@ -622,3 +622,47 @@ void A<T>::method(Ts&... ts) } {} } + +namespace GH63782 { +// GH63782 was also fixed by PR #80594, so let's add a test for it. + +template<bool... Vals> +constexpr bool All = (Vals && ...); + +template<bool... Bs> +class Class { + template<typename> + requires All<Bs...> + void Foo(); +}; + +template<bool... Bs> +template<typename> +requires All<Bs...> +void Class<Bs...>::Foo() { +}; + +} // namespace GH63782 + +namespace eve { +// Reduced from the "eve" project + +template <typename... Ts> +struct tuple { + template <int I0> requires(I0 <= sizeof...(Ts)) + constexpr auto split(); +}; + +template <typename... Ts> +template <int I0> +requires(I0 <= sizeof...(Ts)) +constexpr auto tuple<Ts...>::split(){ + return 0; +} + +int foo() { + tuple<int, float> x; + return x.split<0>(); +} + +} // namespace eve diff --git a/clang/test/clang-rename/ClassAsTemplateArgument.cpp b/clang/test/clang-rename/ClassAsTemplateArgument.cpp deleted file mode 100644 index 2e09a5b529e7..000000000000 --- a/clang/test/clang-rename/ClassAsTemplateArgument.cpp +++ /dev/null @@ -1,21 +0,0 @@ -class Foo /* Test 1 */ {}; // CHECK: class Bar /* Test 1 */ {}; - -template <typename T> -void func() {} - -template <typename T> -class Baz {}; - -int main() { - func<Foo>(); // CHECK: func<Bar>(); - Baz<Foo> /* Test 2 */ obj; // CHECK: Baz<Bar> /* Test 2 */ obj; - return 0; -} - -// Test 1. -// RUN: clang-rename -offset=7 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s -// Test 2. -// RUN: clang-rename -offset=215 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s - -// To find offsets after modifying the file, use: -// grep -Ubo 'Foo.*' <file> diff --git a/clang/test/clang-rename/ClassFindByName.cpp b/clang/test/clang-rename/ClassFindByName.cpp deleted file mode 100644 index 4430891ec4b1..000000000000 --- a/clang/test/clang-rename/ClassFindByName.cpp +++ /dev/null @@ -1,10 +0,0 @@ -class Foo { // CHECK: class Bar { -}; - -int main() { - Foo *Pointer = 0; // CHECK: Bar *Pointer = 0; - return 0; -} - -// Test 1. -// RUN: clang-rename -qualified-name=Foo -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s diff --git a/clang/test/clang-rename/ClassSimpleRenaming.cpp b/clang/test/clang-rename/ClassSimpleRenaming.cpp deleted file mode 100644 index 086f55736cb7..000000000000 --- a/clang/test/clang-rename/ClassSimpleRenaming.cpp +++ /dev/null @@ -1,14 +0,0 @@ -class Foo /* Test 1 */ { // CHECK: class Bar /* Test 1 */ { -public: - void foo(int x); -}; - -void Foo::foo(int x) /* Test 2 */ {} // CHECK: void Bar::foo(int x) /* Test 2 */ {} - -// Test 1. -// RUN: clang-rename -offset=6 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s -// Test 2. -// RUN: clang-rename -offset=109 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s - -// To find offsets after modifying the file, use: -// grep -Ubo 'Foo.*' <file> diff --git a/clang/test/clang-rename/ClassTestMulti.cpp b/clang/test/clang-rename/ClassTestMulti.cpp deleted file mode 100644 index 81e65c760652..000000000000 --- a/clang/test/clang-rename/ClassTestMulti.cpp +++ /dev/null @@ -1,11 +0,0 @@ -class Foo1 /* Offset 1 */ { // CHECK: class Bar1 /* Offset 1 */ { -}; - -class Foo2 /* Offset 2 */ { // CHECK: class Bar2 /* Offset 2 */ { -}; - -// Test 1. -// RUN: clang-rename -offset=6 -new-name=Bar1 -offset=76 -new-name=Bar2 %s -- | sed 's,//.*,,' | FileCheck %s - -// To find offsets after modifying the file, use: -// grep -Ubo 'Foo.*' <file> diff --git a/clang/test/clang-rename/ClassTestMultiByName.cpp b/clang/test/clang-rename/ClassTestMultiByName.cpp deleted file mode 100644 index 61b69a1bdf4c..000000000000 --- a/clang/test/clang-rename/ClassTestMultiByName.cpp +++ /dev/null @@ -1,8 +0,0 @@ -class Foo1 { // CHECK: class Bar1 -}; - -class Foo2 { // CHECK: class Bar2 -}; - -// Test 1. -// RUN: clang-rename -qualified-name=Foo1 -new-name=Bar1 -qualified-name=Foo2 -new-name=Bar2 %s -- | sed 's,//.*,,' | FileCheck %s diff --git a/clang/test/clang-rename/ComplexFunctionOverride.cpp b/clang/test/clang-rename/ComplexFunctionOverride.cpp deleted file mode 100644 index ccf3a20e5400..000000000000 --- a/clang/test/clang-rename/ComplexFunctionOverride.cpp +++ /dev/null @@ -1,47 +0,0 @@ -struct A { - virtual void foo() {} /* Test 1 */ // CHECK: virtual void bar() {} -}; - -struct B : A { - void foo() override {} /* Test 2 */ // CHECK: void bar() override {} -}; - -struct C : B { - void foo() override {} /* Test 3 */ // CHECK: void bar() override {} -}; - -struct D : B { - void foo() override {} /* Test 4 */ // CHECK: void bar() override {} -}; - -struct E : D { - void foo() override {} /* Test 5 */ // CHECK: void bar() override {} -}; - -int main() { - A a; - a.foo(); // CHECK: a.bar(); - B b; - b.foo(); // CHECK: b.bar(); - C c; - c.foo(); // CHECK: c.bar(); - D d; - d.foo(); // CHECK: d.bar(); - E e; - e.foo(); // CHECK: e.bar(); - return 0; -} - -// Test 1. -// RUN: clang-rename -offset=26 -new-name=bar %s -- | sed 's,//.*,,' | FileCheck %s -// Test 2. -// RUN: clang-rename -offset=109 -new-name=bar %s -- | sed 's,//.*,,' | FileCheck %s -// Test 3. -// RUN: clang-rename -offset=201 -new-name=bar %s -- | sed 's,//.*,,' | FileCheck %s -// Test 4. -// RUN: clang-rename -offset=293 -new-name=bar %s -- | sed 's,//.*,,' | FileCheck %s -// Test 5. -// RUN: clang-rename -offset=385 -new-name=bar %s -- | sed 's,//.*,,' | FileCheck %s - -// To find offsets after modifying the file, use: -// grep -Ubo 'foo.*' <file> diff --git a/clang/test/clang-rename/ComplicatedClassType.cpp b/clang/test/clang-rename/ComplicatedClassType.cpp deleted file mode 100644 index 880195303127..000000000000 --- a/clang/test/clang-rename/ComplicatedClassType.cpp +++ /dev/null @@ -1,63 +0,0 @@ -// Forward declaration. -class Foo; /* Test 1 */ // CHECK: class Bar; /* Test 1 */ - -class Baz { - virtual int getValue() const = 0; -}; - -class Foo : public Baz { /* Test 2 */// CHECK: class Bar : public Baz { -public: - Foo(int value = 0) : x(value) {} // CHECK: Bar(int value = 0) : x(value) {} - - Foo &operator++(int) { // CHECK: Bar &operator++(int) { - x++; - return *this; - } - - bool operator<(Foo const &rhs) { // CHECK: bool operator<(Bar const &rhs) { - return this->x < rhs.x; - } - - int getValue() const { - return 0; - } - -private: - int x; -}; - -int main() { - Foo *Pointer = 0; // CHECK: Bar *Pointer = 0; - Foo Variable = Foo(10); // CHECK: Bar Variable = Bar(10); - for (Foo it; it < Variable; it++) { // CHECK: for (Bar it; it < Variable; it++) { - } - const Foo *C = new Foo(); // CHECK: const Bar *C = new Bar(); - const_cast<Foo *>(C)->getValue(); // CHECK: const_cast<Bar *>(C)->getValue(); - Foo foo; // CHECK: Bar foo; - const Baz &BazReference = foo; - const Baz *BazPointer = &foo; - dynamic_cast<const Foo &>(BazReference).getValue(); /* Test 3 */ // CHECK: dynamic_cast<const Bar &>(BazReference).getValue(); - dynamic_cast<const Foo *>(BazPointer)->getValue(); /* Test 4 */ // CHECK: dynamic_cast<const Bar *>(BazPointer)->getValue(); - reinterpret_cast<const Foo *>(BazPointer)->getValue(); /* Test 5 */ // CHECK: reinterpret_cast<const Bar *>(BazPointer)->getValue(); - static_cast<const Foo &>(BazReference).getValue(); /* Test 6 */ // CHECK: static_cast<const Bar &>(BazReference).getValue(); - static_cast<const Foo *>(BazPointer)->getValue(); /* Test 7 */ // CHECK: static_cast<const Bar *>(BazPointer)->getValue(); - return 0; -} - -// Test 1. -// RUN: clang-rename -offset=30 -new-name=Bar %s -- -frtti | sed 's,//.*,,' | FileCheck %s -// Test 2. -// RUN: clang-rename -offset=155 -new-name=Bar %s -- -frtti | sed 's,//.*,,' | FileCheck %s -// Test 3. -// RUN: clang-rename -offset=1133 -new-name=Bar %s -- -frtti | sed 's,//.*,,' | FileCheck %s -// Test 4. -// RUN: clang-rename -offset=1266 -new-name=Bar %s -- -frtti | sed 's,//.*,,' | FileCheck %s -// Test 5. -// RUN: clang-rename -offset=1402 -new-name=Bar %s -- -frtti | sed 's,//.*,,' | FileCheck %s -// Test 6. -// RUN: clang-rename -offset=1533 -new-name=Bar %s -- -frtti | sed 's,//.*,,' | FileCheck %s -// Test 7. -// RUN: clang-rename -offset=1665 -new-name=Bar %s -- -frtti | sed 's,//.*,,' | FileCheck %s - -// To find offsets after modifying the file, use: -// grep -Ubo 'Foo.*' <file> diff --git a/clang/test/clang-rename/Ctor.cpp b/clang/test/clang-rename/Ctor.cpp deleted file mode 100644 index 9908a4123ddf..000000000000 --- a/clang/test/clang-rename/Ctor.cpp +++ /dev/null @@ -1,14 +0,0 @@ -class Foo { // CHECK: class Bar { -public: - Foo(); /* Test 1 */ // CHECK: Bar(); -}; - -Foo::Foo() /* Test 2 */ {} // CHECK: Bar::Bar() /* Test 2 */ {} - -// Test 1. -// RUN: clang-rename -offset=62 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s -// Test 2. -// RUN: clang-rename -offset=116 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s - -// To find offsets after modifying the file, use: -// grep -Ubo 'Foo.*' <file> diff --git a/clang/test/clang-rename/CtorInitializer.cpp b/clang/test/clang-rename/CtorInitializer.cpp deleted file mode 100644 index fed4f5b06c27..000000000000 --- a/clang/test/clang-rename/CtorInitializer.cpp +++ /dev/null @@ -1,17 +0,0 @@ -class Baz {}; - -class Qux { - Baz Foo; /* Test 1 */ // CHECK: Baz Bar; -public: - Qux(); -}; - -Qux::Qux() : Foo() /* Test 2 */ {} // CHECK: Qux::Qux() : Bar() /* Test 2 */ {} - -// Test 1. -// RUN: clang-rename -offset=33 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s -// Test 2. -// RUN: clang-rename -offset=118 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s - -// To find offsets after modifying the file, use: -// grep -Ubo 'Foo.*' <file> diff --git a/clang/test/clang-rename/DeclRefExpr.cpp b/clang/test/clang-rename/DeclRefExpr.cpp deleted file mode 100644 index 6462862d82ad..000000000000 --- a/clang/test/clang-rename/DeclRefExpr.cpp +++ /dev/null @@ -1,24 +0,0 @@ -class C { -public: - static int Foo; /* Test 1 */ // CHECK: static int Bar; -}; - -int foo(int x) { return 0; } -#define MACRO(a) foo(a) - -int main() { - C::Foo = 1; /* Test 2 */ // CHECK: C::Bar = 1; - MACRO(C::Foo); // CHECK: MACRO(C::Bar); - int y = C::Foo; /* Test 3 */ // CHECK: int y = C::Bar; - return 0; -} - -// Test 1. -// RUN: clang-rename -offset=31 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s -// Test 2. -// RUN: clang-rename -offset=152 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s -// Test 3. -// RUN: clang-rename -offset=271 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s - -// To find offsets after modifying the file, use: -// grep -Ubo 'Foo.*' <file> diff --git a/clang/test/clang-rename/ForceMulti.cpp b/clang/test/clang-rename/ForceMulti.cpp deleted file mode 100644 index 41983ce260c8..000000000000 --- a/clang/test/clang-rename/ForceMulti.cpp +++ /dev/null @@ -1,8 +0,0 @@ -class B /* Test 1 */ { // CHECK: class B2 /* Test 1 */ { -}; - -class D : public B /* Test 1 */ { // CHECK: class D : public B2 /* Test 1 */ { -}; - -// Test 1. -// RUN: clang-rename -force -qualified-name B -new-name B2 -qualified-name E -new-name E2 %s -- | sed 's,//.*,,' | FileCheck %s diff --git a/clang/test/clang-rename/ForwardClassDecl.cpp b/clang/test/clang-rename/ForwardClassDecl.cpp deleted file mode 100644 index ef731a16d6e0..000000000000 --- a/clang/test/clang-rename/ForwardClassDecl.cpp +++ /dev/null @@ -1,4 +0,0 @@ -class Foo; // CHECK: class Bar; -Foo *f(); // CHECK: Bar *f(); - -// RUN: clang-rename -offset=6 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s diff --git a/clang/test/clang-rename/FunctionMacro.cpp b/clang/test/clang-rename/FunctionMacro.cpp deleted file mode 100644 index 6e87026ec706..000000000000 --- a/clang/test/clang-rename/FunctionMacro.cpp +++ /dev/null @@ -1,20 +0,0 @@ -#define moo foo // CHECK: #define moo macro_function - -int foo() /* Test 1 */ { // CHECK: int macro_function() /* Test 1 */ { - return 42; -} - -void boo(int value) {} - -void qoo() { - foo(); // CHECK: macro_function(); - boo(foo()); // CHECK: boo(macro_function()); - moo(); - boo(moo()); -} - -// Test 1. -// RUN: clang-rename -offset=68 -new-name=macro_function %s -- | sed 's,//.*,,' | FileCheck %s - -// To find offsets after modifying the file, use: -// grep -Ubo 'foo.*' <file> diff --git a/clang/test/clang-rename/FunctionOverride.cpp b/clang/test/clang-rename/FunctionOverride.cpp deleted file mode 100644 index adfeb739e66d..000000000000 --- a/clang/test/clang-rename/FunctionOverride.cpp +++ /dev/null @@ -1,13 +0,0 @@ -class A { virtual void foo(); /* Test 1 */ }; // CHECK: class A { virtual void bar(); -class B : public A { void foo(); /* Test 2 */ }; // CHECK: class B : public A { void bar(); -class C : public B { void foo(); /* Test 3 */ }; // CHECK: class C : public B { void bar(); - -// Test 1. -// RUN: clang-rename -offset=23 -new-name=bar %s -- | sed 's,//.*,,' | FileCheck %s -// Test 2. -// RUN: clang-rename -offset=116 -new-name=bar %s -- | sed 's,//.*,,' | FileCheck %s -// Test 3. -// RUN: clang-rename -offset=209 -new-name=bar %s -- | sed 's,//.*,,' | FileCheck %s - -// To find offsets after modifying the file, use: -// grep -Ubo 'foo.*' <file> diff --git a/clang/test/clang-rename/FunctionTemplate.cpp b/clang/test/clang-rename/FunctionTemplate.cpp deleted file mode 100644 index 51b2515b8894..000000000000 --- a/clang/test/clang-rename/FunctionTemplate.cpp +++ /dev/null @@ -1,19 +0,0 @@ -template <typename T> -void Foo(T t); // CHECK: void Bar(T t); - -template <> -void Foo(int a); // CHECK: void Bar(int a); - -void test() { - Foo<double>(1); // CHECK: Bar<double>(1); -} - -// Test 1. -// RUN: clang-rename -offset=28 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s -// Test 2. -// RUN: clang-rename -offset=81 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s -// Test 3. -// RUN: clang-rename -offset=137 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s - -// To find offsets after modifying the file, use: -// grep -Ubo 'Foo.*' <file> diff --git a/clang/test/clang-rename/FunctionWithClassFindByName.cpp b/clang/test/clang-rename/FunctionWithClassFindByName.cpp deleted file mode 100644 index 2cae09a1c244..000000000000 --- a/clang/test/clang-rename/FunctionWithClassFindByName.cpp +++ /dev/null @@ -1,12 +0,0 @@ -void foo() { -} - -class Foo { // CHECK: class Bar -}; - -int main() { - Foo *Pointer = 0; // CHECK: Bar *Pointer = 0; - return 0; -} - -// RUN: clang-rename -qualified-name=Foo -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s diff --git a/clang/test/clang-rename/IncludeHeaderWithSymbol.cpp b/clang/test/clang-rename/IncludeHeaderWithSymbol.cpp deleted file mode 100644 index cb2baee57b89..000000000000 --- a/clang/test/clang-rename/IncludeHeaderWithSymbol.cpp +++ /dev/null @@ -1,10 +0,0 @@ -#include "Inputs/HeaderWithSymbol.h" - -int main() { - return 0; // CHECK: {{^ return 0;}} -} - -// Test 1. -// The file IncludeHeaderWithSymbol.cpp doesn't contain the symbol Foo -// and is expected to be written to stdout without modifications -// RUN: clang-rename -qualified-name=Foo -new-name=Bar %s -- | FileCheck %s diff --git a/clang/test/clang-rename/Inputs/HeaderWithSymbol.h b/clang/test/clang-rename/Inputs/HeaderWithSymbol.h deleted file mode 100644 index 1fe02e89786c..000000000000 --- a/clang/test/clang-rename/Inputs/HeaderWithSymbol.h +++ /dev/null @@ -1 +0,0 @@ -struct Foo {}; diff --git a/clang/test/clang-rename/Inputs/OffsetToNewName.yaml b/clang/test/clang-rename/Inputs/OffsetToNewName.yaml deleted file mode 100644 index d8e972880f36..000000000000 --- a/clang/test/clang-rename/Inputs/OffsetToNewName.yaml +++ /dev/null @@ -1,6 +0,0 @@ ---- -- Offset: 6 - NewName: Bar1 -- Offset: 44 - NewName: Bar2 -... diff --git a/clang/test/clang-rename/Inputs/QualifiedNameToNewName.yaml b/clang/test/clang-rename/Inputs/QualifiedNameToNewName.yaml deleted file mode 100644 index 6e3783671dfa..000000000000 --- a/clang/test/clang-rename/Inputs/QualifiedNameToNewName.yaml +++ /dev/null @@ -1,6 +0,0 @@ ---- -- QualifiedName: Foo1 - NewName: Bar1 -- QualifiedName: Foo2 - NewName: Bar2 -... diff --git a/clang/test/clang-rename/InvalidNewName.cpp b/clang/test/clang-rename/InvalidNewName.cpp deleted file mode 100644 index e6b38e59420a..000000000000 --- a/clang/test/clang-rename/InvalidNewName.cpp +++ /dev/null @@ -1,2 +0,0 @@ -// RUN: not clang-rename -new-name=class -offset=133 %s 2>&1 | FileCheck %s -// CHECK: ERROR: new name is not a valid identifier in C++17. diff --git a/clang/test/clang-rename/InvalidOffset.cpp b/clang/test/clang-rename/InvalidOffset.cpp deleted file mode 100644 index 2ae04d01e4a7..000000000000 --- a/clang/test/clang-rename/InvalidOffset.cpp +++ /dev/null @@ -1,9 +0,0 @@ -#include "Inputs/HeaderWithSymbol.h" -#define FOO int bar; -FOO - -int foo; - -// RUN: not clang-rename -new-name=qux -offset=259 %s -- 2>&1 | FileCheck %s -// CHECK-NOT: CHECK -// CHECK: error: SourceLocation in file {{.*}}InvalidOffset.cpp at offset 259 is invalid diff --git a/clang/test/clang-rename/InvalidQualifiedName.cpp b/clang/test/clang-rename/InvalidQualifiedName.cpp deleted file mode 100644 index 5280e3939ccd..000000000000 --- a/clang/test/clang-rename/InvalidQualifiedName.cpp +++ /dev/null @@ -1,4 +0,0 @@ -struct S { -}; - -// RUN: clang-rename -force -qualified-name S2 -new-name=T %s -- diff --git a/clang/test/clang-rename/MemberExprMacro.cpp b/clang/test/clang-rename/MemberExprMacro.cpp deleted file mode 100644 index 56cd8d95f6e8..000000000000 --- a/clang/test/clang-rename/MemberExprMacro.cpp +++ /dev/null @@ -1,22 +0,0 @@ -class Baz { -public: - int Foo; /* Test 1 */ // CHECK: int Bar; -}; - -int qux(int x) { return 0; } -#define MACRO(a) qux(a) - -int main() { - Baz baz; - baz.Foo = 1; /* Test 2 */ // CHECK: baz.Bar = 1; - MACRO(baz.Foo); // CHECK: MACRO(baz.Bar); - int y = baz.Foo; // CHECK: int y = baz.Bar; -} - -// Test 1. -// RUN: clang-rename -offset=26 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s -// Test 2. -// RUN: clang-rename -offset=155 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s - -// To find offsets after modifying the file, use: -// grep -Ubo 'Foo.*' <file> diff --git a/clang/test/clang-rename/Namespace.cpp b/clang/test/clang-rename/Namespace.cpp deleted file mode 100644 index ec9630fdedb6..000000000000 --- a/clang/test/clang-rename/Namespace.cpp +++ /dev/null @@ -1,13 +0,0 @@ -namespace gcc /* Test 1 */ { // CHECK: namespace clang /* Test 1 */ { - int x; -} - -void boo() { - gcc::x = 42; // CHECK: clang::x = 42; -} - -// Test 1. -// RUN: clang-rename -offset=10 -new-name=clang %s -- | sed 's,//.*,,' | FileCheck %s - -// To find offsets after modifying the file, use: -// grep -Ubo 'Foo.*' <file> diff --git a/clang/test/clang-rename/NoNewName.cpp b/clang/test/clang-rename/NoNewName.cpp deleted file mode 100644 index 4f882d83b0c1..000000000000 --- a/clang/test/clang-rename/NoNewName.cpp +++ /dev/null @@ -1,4 +0,0 @@ -// Check for an error while -new-name argument has not been passed to -// clang-rename. -// RUN: not clang-rename -offset=133 %s 2>&1 | FileCheck %s -// CHECK: clang-rename: -new-name must be specified. diff --git a/clang/test/clang-rename/NonExistFile.cpp b/clang/test/clang-rename/NonExistFile.cpp deleted file mode 100644 index f45839be8047..000000000000 --- a/clang/test/clang-rename/NonExistFile.cpp +++ /dev/null @@ -1,2 +0,0 @@ -// RUN: not clang-rename -offset=0 -new-name=bar non-existing-file 2>&1 | FileCheck %s -// CHECK: clang-rename: non-existing-file does not exist. diff --git a/clang/test/clang-rename/TemplateClassInstantiation.cpp b/clang/test/clang-rename/TemplateClassInstantiation.cpp deleted file mode 100644 index 493d0951df57..000000000000 --- a/clang/test/clang-rename/TemplateClassInstantiation.cpp +++ /dev/null @@ -1,42 +0,0 @@ -template <typename T> -class Foo { /* Test 1 */ // CHECK: class Bar { /* Test 1 */ -public: - T foo(T arg, T& ref, T* ptr) { - T value; - int number = 42; - value = (T)number; - value = static_cast<T>(number); - return value; - } - static void foo(T value) {} - T member; -}; - -template <typename T> -void func() { - Foo<T> obj; /* Test 2 */ // CHECK: Bar<T> obj; - obj.member = T(); - Foo<T>::foo(); // CHECK: Bar<T>::foo(); -} - -int main() { - Foo<int> i; /* Test 3 */ // CHECK: Bar<int> i; - i.member = 0; - Foo<int>::foo(0); // CHECK: Bar<int>::foo(0); - - Foo<bool> b; // CHECK: Bar<bool> b; - b.member = false; - Foo<bool>::foo(false); // CHECK: Bar<bool>::foo(false); - - return 0; -} - -// Test 1. -// RUN: clang-rename -offset=29 -new-name=Bar %s -- -fno-delayed-template-parsing | sed 's,//.*,,' | FileCheck %s -// Test 2. -// RUN: clang-rename -offset=324 -new-name=Bar %s -- -fno-delayed-template-parsing | sed 's,//.*,,' | FileCheck %s -// Test 3. -// RUN: clang-rename -offset=463 -new-name=Bar %s -- -fno-delayed-template-parsing | sed 's,//.*,,' | FileCheck %s - -// To find offsets after modifying the file, use: -// grep -Ubo 'Foo.*' <file> diff --git a/clang/test/clang-rename/TemplateCtor.cpp b/clang/test/clang-rename/TemplateCtor.cpp deleted file mode 100644 index 9a59194ac3f4..000000000000 --- a/clang/test/clang-rename/TemplateCtor.cpp +++ /dev/null @@ -1,10 +0,0 @@ -class Foo { // CHECK: class Bar { -public: - template <typename T> - Foo(); // CHECK: Bar(); - - template <typename T> - Foo(Foo &); // CHECK: Bar(Bar &); -}; - -// RUN: clang-rename -offset=6 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s diff --git a/clang/test/clang-rename/TemplateTypename.cpp b/clang/test/clang-rename/TemplateTypename.cpp deleted file mode 100644 index 559ec1f9ade7..000000000000 --- a/clang/test/clang-rename/TemplateTypename.cpp +++ /dev/null @@ -1,24 +0,0 @@ -template <typename T /* Test 1 */> // CHECK: template <typename U /* Test 1 */> -class Foo { -T foo(T arg, T& ref, T* /* Test 2 */ ptr) { // CHECK: U foo(U arg, U& ref, U* /* Test 2 */ ptr) { - T value; // CHECK: U value; - int number = 42; - value = (T)number; // CHECK: value = (U)number; - value = static_cast<T /* Test 3 */>(number); // CHECK: value = static_cast<U /* Test 3 */>(number); - return value; -} - -static void foo(T value) {} // CHECK: static void foo(U value) {} - -T member; // CHECK: U member; -}; - -// Test 1. -// RUN: clang-rename -offset=19 -new-name=U %s -- -fno-delayed-template-parsing | sed 's,//.*,,' | FileCheck %s -// Test 2. -// RUN: clang-rename -offset=126 -new-name=U %s -- -fno-delayed-template-parsing | sed 's,//.*,,' | FileCheck %s -// Test 3. -// RUN: clang-rename -offset=392 -new-name=U %s -- -fno-delayed-template-parsing | sed 's,//.*,,' | FileCheck %s - -// To find offsets after modifying the file, use: -// grep -Ubo 'T.*' <file> diff --git a/clang/test/clang-rename/TemplatedClassFunction.cpp b/clang/test/clang-rename/TemplatedClassFunction.cpp deleted file mode 100644 index d7f21e0847c9..000000000000 --- a/clang/test/clang-rename/TemplatedClassFunction.cpp +++ /dev/null @@ -1,27 +0,0 @@ -template <typename T> -class A { -public: - void foo() /* Test 1 */ {} // CHECK: void bar() /* Test 1 */ {} -}; - -int main(int argc, char **argv) { - A<int> a; - A<double> b; - A<float> c; - a.foo(); /* Test 2 */ // CHECK: a.bar(); /* Test 2 */ - b.foo(); /* Test 3 */ // CHECK: b.bar(); /* Test 3 */ - c.foo(); /* Test 4 */ // CHECK: c.bar(); /* Test 4 */ - return 0; -} - -// Test 1. -// RUN: clang-rename -offset=48 -new-name=bar %s -- | sed 's,//.*,,' | FileCheck %s -// Test 2. -// RUN: clang-rename -offset=191 -new-name=bar %s -- | sed 's,//.*,,' | FileCheck %s -// Test 3. -// RUN: clang-rename -offset=255 -new-name=bar %s -- | sed 's,//.*,,' | FileCheck %s -// Test 4. -// RUN: clang-rename -offset=319 -new-name=bar %s -- | sed 's,//.*,,' | FileCheck %s - -// To find offsets after modifying the file, use: -// grep -Ubo 'foo.*' <file> diff --git a/clang/test/clang-rename/Typedef.cpp b/clang/test/clang-rename/Typedef.cpp deleted file mode 100644 index 64d337fae22c..000000000000 --- a/clang/test/clang-rename/Typedef.cpp +++ /dev/null @@ -1,8 +0,0 @@ -namespace std { -class basic_string {}; -typedef basic_string string; -} // namespace std - -std::string foo(); // // CHECK: std::new_string foo(); - -// RUN: clang-rename -offset=93 -new-name=new_string %s -- | sed 's,//.*,,' | FileCheck %s diff --git a/clang/test/clang-rename/UserDefinedConversion.cpp b/clang/test/clang-rename/UserDefinedConversion.cpp deleted file mode 100644 index 60f251ab4483..000000000000 --- a/clang/test/clang-rename/UserDefinedConversion.cpp +++ /dev/null @@ -1,26 +0,0 @@ -class Foo { /* Test 1 */ // CHECK: class Bar { -public: - Foo() {} // CHECK: Bar() {} -}; - -class Baz { -public: - operator Foo() /* Test 2 */ const { // CHECK: operator Bar() /* Test 2 */ const { - Foo foo; // CHECK: Bar foo; - return foo; - } -}; - -int main() { - Baz boo; - Foo foo = static_cast<Foo>(boo); // CHECK: Bar foo = static_cast<Bar>(boo); - return 0; -} - -// Test 1. -// RUN: clang-rename -offset=7 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s -// Test 2. -// RUN: clang-rename -offset=164 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s - -// To find offsets after modifying the file, use: -// grep -Ubo 'Foo.*' <file> diff --git a/clang/test/clang-rename/Variable.cpp b/clang/test/clang-rename/Variable.cpp deleted file mode 100644 index d7e670fb43ee..000000000000 --- a/clang/test/clang-rename/Variable.cpp +++ /dev/null @@ -1,33 +0,0 @@ -#define NAMESPACE namespace A -NAMESPACE { -int Foo; /* Test 1 */ // CHECK: int Bar; -} -int Foo; // CHECK: int Foo; -int Qux = Foo; // CHECK: int Qux = Foo; -int Baz = A::Foo; /* Test 2 */ // CHECK: Baz = A::Bar; -void fun() { - struct { - int Foo; // CHECK: int Foo; - } b = {100}; - int Foo = 100; // CHECK: int Foo = 100; - Baz = Foo; // CHECK: Baz = Foo; - { - extern int Foo; // CHECK: extern int Foo; - Baz = Foo; // CHECK: Baz = Foo; - Foo = A::Foo /* Test 3 */ + Baz; // CHECK: Foo = A::Bar /* Test 3 */ + Baz; - A::Foo /* Test 4 */ = b.Foo; // CHECK: A::Bar /* Test 4 */ = b.Foo; - } - Foo = b.Foo; // Foo = b.Foo; -} - -// Test 1. -// RUN: clang-rename -offset=46 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s -// Test 2. -// RUN: clang-rename -offset=234 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s -// Test 3. -// RUN: clang-rename -offset=641 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s -// Test 4. -// RUN: clang-rename -offset=716 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s - -// To find offsets after modifying the file, use: -// grep -Ubo 'Foo.*' <file> diff --git a/clang/test/clang-rename/VariableMacro.cpp b/clang/test/clang-rename/VariableMacro.cpp deleted file mode 100644 index 622e825d3e41..000000000000 --- a/clang/test/clang-rename/VariableMacro.cpp +++ /dev/null @@ -1,21 +0,0 @@ -#define Baz Foo // CHECK: #define Baz Bar - -void foo(int value) {} - -void macro() { - int Foo; /* Test 1 */ // CHECK: int Bar; - Foo = 42; /* Test 2 */ // CHECK: Bar = 42; - Baz -= 0; - foo(Foo); /* Test 3 */ // CHECK: foo(Bar); - foo(Baz); -} - -// Test 1. -// RUN: clang-rename -offset=88 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s -// Test 2. -// RUN: clang-rename -offset=129 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s -// Test 3. -// RUN: clang-rename -offset=191 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s - -// To find offsets after modifying the file, use: -// grep -Ubo 'Foo.*' <file> diff --git a/clang/test/clang-rename/VariableTemplate.cpp b/clang/test/clang-rename/VariableTemplate.cpp deleted file mode 100644 index a345ede5a7f6..000000000000 --- a/clang/test/clang-rename/VariableTemplate.cpp +++ /dev/null @@ -1,32 +0,0 @@ -template <typename T, int U> -bool Foo = true; // CHECK: bool Bar = true; - -// explicit template specialization -template <> -bool Foo<int, 0> = false; // CHECK: bool Bar<int, 0> = false; - -// partial template specialization -template <typename T> -bool Foo<T, 1> = false; // bool Bar<x, 1> = false; - -void k() { - // ref to the explicit template specialization - Foo<int, 0>; // CHECK: Bar<int, 0>; - // ref to the primary template. - Foo<double, 2>; // CHECK: Bar<double, 2>; -} - - -// Test 1. -// RUN: clang-rename -offset=34 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s -// Test 2. -// RUN: clang-rename -offset=128 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s -// Test 3. -// RUN: clang-rename -offset=248 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s -// Test 4. -// RUN: clang-rename -offset=357 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s -// Test 5. -// RUN: clang-rename -offset=431 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s - -// To find offsets after modifying the file, use: -// grep -Ubo 'Foo.*' <file> diff --git a/clang/test/clang-rename/YAMLInput.cpp b/clang/test/clang-rename/YAMLInput.cpp deleted file mode 100644 index 55dbc6d66a5a..000000000000 --- a/clang/test/clang-rename/YAMLInput.cpp +++ /dev/null @@ -1,10 +0,0 @@ -class Foo1 { // CHECK: class Bar1 -}; - -class Foo2 { // CHECK: class Bar2 -}; - -// Test 1. -// RUN: clang-rename -input %S/Inputs/OffsetToNewName.yaml %s -- | sed 's,//.*,,' | FileCheck %s -// Test 2. -// RUN: clang-rename -input %S/Inputs/QualifiedNameToNewName.yaml %s -- | sed 's,//.*,,' | FileCheck %s diff --git a/clang/test/utils/update-verify-tests/Inputs/duplicate-diag.c b/clang/test/utils/update-verify-tests/Inputs/duplicate-diag.c deleted file mode 100644 index 8c7e46c6eca9..000000000000 --- a/clang/test/utils/update-verify-tests/Inputs/duplicate-diag.c +++ /dev/null @@ -1,8 +0,0 @@ -void foo() { - // expected-error@+1{{use of undeclared identifier 'a'}} - a = 2; a = 2; - b = 2; b = 2; - // expected-error@+1 3{{use of undeclared identifier 'c'}} - c = 2; c = 2; - // expected-error 2{{asdf}} -} diff --git a/clang/test/utils/update-verify-tests/Inputs/duplicate-diag.c.expected b/clang/test/utils/update-verify-tests/Inputs/duplicate-diag.c.expected deleted file mode 100644 index 6214ff382f44..000000000000 --- a/clang/test/utils/update-verify-tests/Inputs/duplicate-diag.c.expected +++ /dev/null @@ -1,8 +0,0 @@ -void foo() { - // expected-error@+1 2{{use of undeclared identifier 'a'}} - a = 2; a = 2; - // expected-error@+1 2{{use of undeclared identifier 'b'}} - b = 2; b = 2; - // expected-error@+1 2{{use of undeclared identifier 'c'}} - c = 2; c = 2; -} diff --git a/clang/test/utils/update-verify-tests/Inputs/infer-indentation.c b/clang/test/utils/update-verify-tests/Inputs/infer-indentation.c deleted file mode 100644 index 0210ac35fd5c..000000000000 --- a/clang/test/utils/update-verify-tests/Inputs/infer-indentation.c +++ /dev/null @@ -1,8 +0,0 @@ -void foo() { - // expected-error@+1 2 {{use of undeclared identifier 'a'}} - a = 2; a = 2; b = 2; b = 2; c = 2; - // expected-error@+1 2 {{asdf}} - d = 2; - e = 2; f = 2; // expected-error 2 {{use of undeclared identifier 'e'}} -} - diff --git a/clang/test/utils/update-verify-tests/Inputs/infer-indentation.c.expected b/clang/test/utils/update-verify-tests/Inputs/infer-indentation.c.expected deleted file mode 100644 index 5c5aaeeef97a..000000000000 --- a/clang/test/utils/update-verify-tests/Inputs/infer-indentation.c.expected +++ /dev/null @@ -1,11 +0,0 @@ -void foo() { - // expected-error@+3 {{use of undeclared identifier 'c'}} - // expected-error@+2 2 {{use of undeclared identifier 'b'}} - // expected-error@+1 2 {{use of undeclared identifier 'a'}} - a = 2; a = 2; b = 2; b = 2; c = 2; - // expected-error@+1 {{use of undeclared identifier 'd'}} - d = 2; - // expected-error@+1 {{use of undeclared identifier 'f'}} - e = 2; f = 2; // expected-error {{use of undeclared identifier 'e'}} -} - diff --git a/clang/test/utils/update-verify-tests/Inputs/leave-existing-diags.c b/clang/test/utils/update-verify-tests/Inputs/leave-existing-diags.c deleted file mode 100644 index 1aa8d088e972..000000000000 --- a/clang/test/utils/update-verify-tests/Inputs/leave-existing-diags.c +++ /dev/null @@ -1,11 +0,0 @@ -void foo() { - a = 2; - // expected-error@-1{{use of undeclared identifier 'a'}} - b = 2;// expected-error{{use of undeclared identifier 'b'}} - c = 2; - // expected-error@5{{use of undeclared identifier 'c'}} - d = 2; // expected-error-re{{use of {{.*}} identifier 'd'}} - - e = 2; // error to trigger mismatch -} - diff --git a/clang/test/utils/update-verify-tests/Inputs/leave-existing-diags.c.expected b/clang/test/utils/update-verify-tests/Inputs/leave-existing-diags.c.expected deleted file mode 100644 index 6b621061bbfb..000000000000 --- a/clang/test/utils/update-verify-tests/Inputs/leave-existing-diags.c.expected +++ /dev/null @@ -1,12 +0,0 @@ -void foo() { - a = 2; - // expected-error@-1{{use of undeclared identifier 'a'}} - b = 2;// expected-error{{use of undeclared identifier 'b'}} - c = 2; - // expected-error@5{{use of undeclared identifier 'c'}} - d = 2; // expected-error-re{{use of {{.*}} identifier 'd'}} - - // expected-error@+1{{use of undeclared identifier 'e'}} - e = 2; // error to trigger mismatch -} - diff --git a/clang/test/utils/update-verify-tests/Inputs/multiple-errors.c b/clang/test/utils/update-verify-tests/Inputs/multiple-errors.c deleted file mode 100644 index e230e0a337bf..000000000000 --- a/clang/test/utils/update-verify-tests/Inputs/multiple-errors.c +++ /dev/null @@ -1,6 +0,0 @@ -void foo() { - a = 2; - b = 2; - - c = 2; -} diff --git a/clang/test/utils/update-verify-tests/Inputs/multiple-errors.c.expected b/clang/test/utils/update-verify-tests/Inputs/multiple-errors.c.expected deleted file mode 100644 index 27dc1f30a26f..000000000000 --- a/clang/test/utils/update-verify-tests/Inputs/multiple-errors.c.expected +++ /dev/null @@ -1,9 +0,0 @@ -void foo() { - // expected-error@+1{{use of undeclared identifier 'a'}} - a = 2; - // expected-error@+1{{use of undeclared identifier 'b'}} - b = 2; - - // expected-error@+1{{use of undeclared identifier 'c'}} - c = 2; -} diff --git a/clang/test/utils/update-verify-tests/Inputs/multiple-missing-errors-same-line.c b/clang/test/utils/update-verify-tests/Inputs/multiple-missing-errors-same-line.c deleted file mode 100644 index 03f723d44bbe..000000000000 --- a/clang/test/utils/update-verify-tests/Inputs/multiple-missing-errors-same-line.c +++ /dev/null @@ -1,8 +0,0 @@ -void foo() { - a = 2; b = 2; c = 2; -} - -void bar() { - x = 2; y = 2; z = 2; - // expected-error@-1{{use of undeclared identifier 'x'}} -} diff --git a/clang/test/utils/update-verify-tests/Inputs/multiple-missing-errors-same-line.c.expected b/clang/test/utils/update-verify-tests/Inputs/multiple-missing-errors-same-line.c.expected deleted file mode 100644 index 24b57f4353d9..000000000000 --- a/clang/test/utils/update-verify-tests/Inputs/multiple-missing-errors-same-line.c.expected +++ /dev/null @@ -1,13 +0,0 @@ -void foo() { - // expected-error@+3{{use of undeclared identifier 'c'}} - // expected-error@+2{{use of undeclared identifier 'b'}} - // expected-error@+1{{use of undeclared identifier 'a'}} - a = 2; b = 2; c = 2; -} - -void bar() { - x = 2; y = 2; z = 2; - // expected-error@-1{{use of undeclared identifier 'x'}} - // expected-error@-2{{use of undeclared identifier 'y'}} - // expected-error@-3{{use of undeclared identifier 'z'}} -} diff --git a/clang/test/utils/update-verify-tests/Inputs/no-checks.c b/clang/test/utils/update-verify-tests/Inputs/no-checks.c deleted file mode 100644 index 8fd1f7cd3337..000000000000 --- a/clang/test/utils/update-verify-tests/Inputs/no-checks.c +++ /dev/null @@ -1,3 +0,0 @@ -void foo() { - bar = 2; -} diff --git a/clang/test/utils/update-verify-tests/Inputs/no-checks.c.expected b/clang/test/utils/update-verify-tests/Inputs/no-checks.c.expected deleted file mode 100644 index e80548fbe50f..000000000000 --- a/clang/test/utils/update-verify-tests/Inputs/no-checks.c.expected +++ /dev/null @@ -1,4 +0,0 @@ -void foo() { - // expected-error@+1{{use of undeclared identifier 'bar'}} - bar = 2; -} diff --git a/clang/test/utils/update-verify-tests/Inputs/no-diags.c b/clang/test/utils/update-verify-tests/Inputs/no-diags.c deleted file mode 100644 index 66d169be4394..000000000000 --- a/clang/test/utils/update-verify-tests/Inputs/no-diags.c +++ /dev/null @@ -1,5 +0,0 @@ -void foo() { - // expected-error@+1{{asdf}} - int a = 2; -} - diff --git a/clang/test/utils/update-verify-tests/Inputs/no-diags.c.expected b/clang/test/utils/update-verify-tests/Inputs/no-diags.c.expected deleted file mode 100644 index 052302849457..000000000000 --- a/clang/test/utils/update-verify-tests/Inputs/no-diags.c.expected +++ /dev/null @@ -1,5 +0,0 @@ -// expected-no-diagnostics -void foo() { - int a = 2; -} - diff --git a/clang/test/utils/update-verify-tests/Inputs/no-expected-diags.c b/clang/test/utils/update-verify-tests/Inputs/no-expected-diags.c deleted file mode 100644 index 78b72e1357da..000000000000 --- a/clang/test/utils/update-verify-tests/Inputs/no-expected-diags.c +++ /dev/null @@ -1,4 +0,0 @@ -// expected-no-diagnostics -void foo() { - a = 2; -} diff --git a/clang/test/utils/update-verify-tests/Inputs/no-expected-diags.c.expected b/clang/test/utils/update-verify-tests/Inputs/no-expected-diags.c.expected deleted file mode 100644 index d948ffce5618..000000000000 --- a/clang/test/utils/update-verify-tests/Inputs/no-expected-diags.c.expected +++ /dev/null @@ -1,4 +0,0 @@ -void foo() { - // expected-error@+1{{use of undeclared identifier 'a'}} - a = 2; -} diff --git a/clang/test/utils/update-verify-tests/Inputs/non-default-prefix.c b/clang/test/utils/update-verify-tests/Inputs/non-default-prefix.c deleted file mode 100644 index 3d63eaf0f1b8..000000000000 --- a/clang/test/utils/update-verify-tests/Inputs/non-default-prefix.c +++ /dev/null @@ -1,5 +0,0 @@ -void foo() { - a = 2; // check-error{{asdf}} - // expected-error@-1{ignored}} -} - diff --git a/clang/test/utils/update-verify-tests/Inputs/non-default-prefix.c.expected b/clang/test/utils/update-verify-tests/Inputs/non-default-prefix.c.expected deleted file mode 100644 index a877f8692212..000000000000 --- a/clang/test/utils/update-verify-tests/Inputs/non-default-prefix.c.expected +++ /dev/null @@ -1,5 +0,0 @@ -void foo() { - a = 2; // check-error{{use of undeclared identifier 'a'}} - // expected-error@-1{ignored}} -} - diff --git a/clang/test/utils/update-verify-tests/Inputs/update-same-line.c b/clang/test/utils/update-verify-tests/Inputs/update-same-line.c deleted file mode 100644 index 5278ce0c57c3..000000000000 --- a/clang/test/utils/update-verify-tests/Inputs/update-same-line.c +++ /dev/null @@ -1,4 +0,0 @@ -void foo() { - bar = 2; // expected-error {{asdf}} -} - diff --git a/clang/test/utils/update-verify-tests/Inputs/update-same-line.c.expected b/clang/test/utils/update-verify-tests/Inputs/update-same-line.c.expected deleted file mode 100644 index 8ba47f788319..000000000000 --- a/clang/test/utils/update-verify-tests/Inputs/update-same-line.c.expected +++ /dev/null @@ -1,4 +0,0 @@ -void foo() { - bar = 2; // expected-error {{use of undeclared identifier 'bar'}} -} - diff --git a/clang/test/utils/update-verify-tests/Inputs/update-single-check.c b/clang/test/utils/update-verify-tests/Inputs/update-single-check.c deleted file mode 100644 index 20b011bfc3d7..000000000000 --- a/clang/test/utils/update-verify-tests/Inputs/update-single-check.c +++ /dev/null @@ -1,4 +0,0 @@ -void foo() { - // expected-error@+1{{asdf}} - bar = 2; -} diff --git a/clang/test/utils/update-verify-tests/Inputs/update-single-check.c.expected b/clang/test/utils/update-verify-tests/Inputs/update-single-check.c.expected deleted file mode 100644 index e80548fbe50f..000000000000 --- a/clang/test/utils/update-verify-tests/Inputs/update-single-check.c.expected +++ /dev/null @@ -1,4 +0,0 @@ -void foo() { - // expected-error@+1{{use of undeclared identifier 'bar'}} - bar = 2; -} diff --git a/clang/test/utils/update-verify-tests/duplicate-diag.test b/clang/test/utils/update-verify-tests/duplicate-diag.test deleted file mode 100644 index db4b0fd86f08..000000000000 --- a/clang/test/utils/update-verify-tests/duplicate-diag.test +++ /dev/null @@ -1,4 +0,0 @@ -# RUN: cp %S/Inputs/duplicate-diag.c %t.c && not %clang_cc1 -verify %t.c 2>&1 | %update-verify-tests -# RUN: diff --strip-trailing-cr %S/Inputs/duplicate-diag.c.expected %t.c -# RUN: %clang_cc1 -verify %t.c - diff --git a/clang/test/utils/update-verify-tests/infer-indentation.test b/clang/test/utils/update-verify-tests/infer-indentation.test deleted file mode 100644 index bd94dce4844e..000000000000 --- a/clang/test/utils/update-verify-tests/infer-indentation.test +++ /dev/null @@ -1,3 +0,0 @@ -# RUN: cp %S/Inputs/infer-indentation.c %t.c && not %clang_cc1 -verify %t.c 2>&1 | %update-verify-tests -# RUN: diff --strip-trailing-cr %S/Inputs/infer-indentation.c.expected %t.c -# RUN: %clang_cc1 -verify %t.c diff --git a/clang/test/utils/update-verify-tests/leave-existing-diags.test b/clang/test/utils/update-verify-tests/leave-existing-diags.test deleted file mode 100644 index 8a723f157bf8..000000000000 --- a/clang/test/utils/update-verify-tests/leave-existing-diags.test +++ /dev/null @@ -1,4 +0,0 @@ -# RUN: cp %S/Inputs/leave-existing-diags.c %t.c && not %clang_cc1 -verify %t.c 2>&1 | %update-verify-tests -# RUN: diff --strip-trailing-cr %S/Inputs/leave-existing-diags.c.expected %t.c -# RUN: %clang_cc1 -verify %t.c - diff --git a/clang/test/utils/update-verify-tests/lit.local.cfg b/clang/test/utils/update-verify-tests/lit.local.cfg deleted file mode 100644 index b0eebf337da5..000000000000 --- a/clang/test/utils/update-verify-tests/lit.local.cfg +++ /dev/null @@ -1,28 +0,0 @@ -import lit.util - -# python 2.7 backwards compatibility -try: - from shlex import quote as shell_quote -except ImportError: - from pipes import quote as shell_quote - -if config.standalone_build: - # These tests require the update-verify-tests.py script from the clang - # source tree, so skip these tests if we are doing standalone builds. - config.unsupported = True -else: - config.suffixes = [".test"] - - script_path = os.path.join( - config.clang_src_dir, "utils", "update-verify-tests.py" - ) - python = shell_quote(config.python_executable) - config.substitutions.append( - ( - "%update-verify-tests", - "%s %s" % (python, shell_quote(script_path)), - ) - ) - # AIX 'diff' command doesn't support --strip-trailing-cr, but the internal - # python implementation does, so use that for cross platform compatibility - config.test_format = lit.formats.ShTest() diff --git a/clang/test/utils/update-verify-tests/multiple-errors.test b/clang/test/utils/update-verify-tests/multiple-errors.test deleted file mode 100644 index 1fcb6b7f2ca0..000000000000 --- a/clang/test/utils/update-verify-tests/multiple-errors.test +++ /dev/null @@ -1,3 +0,0 @@ -# RUN: cp %S/Inputs/multiple-errors.c %t.c && not %clang_cc1 -verify %t.c 2>&1 | %update-verify-tests -# RUN: diff --strip-trailing-cr %S/Inputs/multiple-errors.c.expected %t.c -# RUN: %clang_cc1 -verify %t.c diff --git a/clang/test/utils/update-verify-tests/multiple-missing-errors-same-line.test b/clang/test/utils/update-verify-tests/multiple-missing-errors-same-line.test deleted file mode 100644 index 00338d7595cb..000000000000 --- a/clang/test/utils/update-verify-tests/multiple-missing-errors-same-line.test +++ /dev/null @@ -1,3 +0,0 @@ -# RUN: cp %S/Inputs/multiple-missing-errors-same-line.c %t.c && not %clang_cc1 -verify %t.c 2>&1 | %update-verify-tests -# RUN: diff --strip-trailing-cr %S/Inputs/multiple-missing-errors-same-line.c.expected %t.c -# RUN: %clang_cc1 -verify %t.c diff --git a/clang/test/utils/update-verify-tests/no-checks.test b/clang/test/utils/update-verify-tests/no-checks.test deleted file mode 100644 index 5fdbdcbac952..000000000000 --- a/clang/test/utils/update-verify-tests/no-checks.test +++ /dev/null @@ -1,3 +0,0 @@ -# RUN: cp %S/Inputs/no-checks.c %t.c && not %clang_cc1 -verify %t.c 2>&1 | %update-verify-tests -# RUN: diff --strip-trailing-cr %S/Inputs/no-checks.c.expected %t.c -# RUN: %clang_cc1 -verify %t.c diff --git a/clang/test/utils/update-verify-tests/no-diags.test b/clang/test/utils/update-verify-tests/no-diags.test deleted file mode 100644 index 825fd0219deb..000000000000 --- a/clang/test/utils/update-verify-tests/no-diags.test +++ /dev/null @@ -1,4 +0,0 @@ -# RUN: cp %S/Inputs/no-diags.c %t.c && not %clang_cc1 -verify %t.c 2>&1 | %update-verify-tests -# RUN: diff --strip-trailing-cr %S/Inputs/no-diags.c.expected %t.c -# RUN: %clang_cc1 -verify %t.c - diff --git a/clang/test/utils/update-verify-tests/no-expected-diags.test b/clang/test/utils/update-verify-tests/no-expected-diags.test deleted file mode 100644 index be475c190da1..000000000000 --- a/clang/test/utils/update-verify-tests/no-expected-diags.test +++ /dev/null @@ -1,4 +0,0 @@ -# RUN: cp %S/Inputs/no-expected-diags.c %t.c && not %clang_cc1 -verify %t.c 2>&1 | %update-verify-tests -# RUN: diff --strip-trailing-cr %S/Inputs/no-expected-diags.c.expected %t.c -# RUN: %clang_cc1 -verify %t.c - diff --git a/clang/test/utils/update-verify-tests/non-default-prefix.test b/clang/test/utils/update-verify-tests/non-default-prefix.test deleted file mode 100644 index 594dba4174d2..000000000000 --- a/clang/test/utils/update-verify-tests/non-default-prefix.test +++ /dev/null @@ -1,4 +0,0 @@ -# RUN: cp %S/Inputs/non-default-prefix.c %t.c && not %clang_cc1 -verify=check %t.c 2>&1 | %update-verify-tests --prefix check -# RUN: diff --strip-trailing-cr %S/Inputs/non-default-prefix.c.expected %t.c -# RUN: %clang_cc1 -verify=check %t.c - diff --git a/clang/test/utils/update-verify-tests/update-same-line.test b/clang/test/utils/update-verify-tests/update-same-line.test deleted file mode 100644 index b7e5d7a574ec..000000000000 --- a/clang/test/utils/update-verify-tests/update-same-line.test +++ /dev/null @@ -1,4 +0,0 @@ -# RUN: cp %S/Inputs/update-same-line.c %t.c && not %clang_cc1 -verify %t.c 2>&1 | %update-verify-tests -# RUN: diff --strip-trailing-cr %S/Inputs/update-same-line.c.expected %t.c -# RUN: %clang_cc1 -verify %t.c - diff --git a/clang/test/utils/update-verify-tests/update-single-check.test b/clang/test/utils/update-verify-tests/update-single-check.test deleted file mode 100644 index b958d66b099d..000000000000 --- a/clang/test/utils/update-verify-tests/update-single-check.test +++ /dev/null @@ -1,3 +0,0 @@ -# RUN: cp %S/Inputs/update-single-check.c %t.c && not %clang_cc1 -verify %t.c 2>&1 | %update-verify-tests -# RUN: diff --strip-trailing-cr %S/Inputs/update-single-check.c.expected %t.c -# RUN: %clang_cc1 -verify %t.c diff --git a/clang/tools/CMakeLists.txt b/clang/tools/CMakeLists.txt index f588a3634ee6..9a3512712a28 100644 --- a/clang/tools/CMakeLists.txt +++ b/clang/tools/CMakeLists.txt @@ -19,7 +19,6 @@ endif() add_clang_subdirectory(c-index-test) -add_clang_subdirectory(clang-rename) add_clang_subdirectory(clang-refactor) # For MinGW we only enable shared library if LLVM_LINK_LLVM_DYLIB=ON. # Without that option resulting library is too close to 2^16 DLL exports limit. diff --git a/clang/tools/clang-rename/CMakeLists.txt b/clang/tools/clang-rename/CMakeLists.txt deleted file mode 100644 index f4c4e520520d..000000000000 --- a/clang/tools/clang-rename/CMakeLists.txt +++ /dev/null @@ -1,26 +0,0 @@ -set(LLVM_LINK_COMPONENTS - Option - Support - ) - -add_clang_tool(clang-rename - ClangRename.cpp - ) - -clang_target_link_libraries(clang-rename - PRIVATE - clangBasic - clangFrontend - clangRewrite - clangSerialization - clangTooling - clangToolingCore - clangToolingRefactoring - ) - -install(FILES clang-rename.py - DESTINATION "${CMAKE_INSTALL_DATADIR}/clang" - COMPONENT clang-rename) -install(FILES clang-rename.el - DESTINATION "${CMAKE_INSTALL_DATADIR}/clang" - COMPONENT clang-rename) diff --git a/clang/tools/clang-rename/ClangRename.cpp b/clang/tools/clang-rename/ClangRename.cpp deleted file mode 100644 index f2ac0c4360e0..000000000000 --- a/clang/tools/clang-rename/ClangRename.cpp +++ /dev/null @@ -1,242 +0,0 @@ -//===--- tools/extra/clang-rename/ClangRename.cpp - Clang rename tool -----===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file implements a clang-rename tool that automatically finds and -/// renames symbols in C++ code. -/// -//===----------------------------------------------------------------------===// - -#include "clang/Basic/Diagnostic.h" -#include "clang/Basic/DiagnosticOptions.h" -#include "clang/Basic/FileManager.h" -#include "clang/Basic/IdentifierTable.h" -#include "clang/Basic/LangOptions.h" -#include "clang/Basic/SourceManager.h" -#include "clang/Basic/TokenKinds.h" -#include "clang/Frontend/TextDiagnosticPrinter.h" -#include "clang/Rewrite/Core/Rewriter.h" -#include "clang/Tooling/CommonOptionsParser.h" -#include "clang/Tooling/Refactoring.h" -#include "clang/Tooling/Refactoring/Rename/RenamingAction.h" -#include "clang/Tooling/Refactoring/Rename/USRFindingAction.h" -#include "clang/Tooling/ReplacementsYaml.h" -#include "clang/Tooling/Tooling.h" -#include "llvm/ADT/IntrusiveRefCntPtr.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/YAMLTraits.h" -#include "llvm/Support/raw_ostream.h" -#include <string> -#include <system_error> - -using namespace llvm; -using namespace clang; - -/// An oldname -> newname rename. -struct RenameAllInfo { - unsigned Offset = 0; - std::string QualifiedName; - std::string NewName; -}; - -LLVM_YAML_IS_SEQUENCE_VECTOR(RenameAllInfo) - -namespace llvm { -namespace yaml { - -/// Specialized MappingTraits to describe how a RenameAllInfo is -/// (de)serialized. -template <> struct MappingTraits<RenameAllInfo> { - static void mapping(IO &IO, RenameAllInfo &Info) { - IO.mapOptional("Offset", Info.Offset); - IO.mapOptional("QualifiedName", Info.QualifiedName); - IO.mapRequired("NewName", Info.NewName); - } -}; - -} // end namespace yaml -} // end namespace llvm - -static cl::OptionCategory ClangRenameOptions("clang-rename common options"); - -static cl::list<unsigned> SymbolOffsets( - "offset", - cl::desc("Locates the symbol by offset as opposed to <line>:<column>."), - cl::cat(ClangRenameOptions)); -static cl::opt<bool> Inplace("i", cl::desc("Overwrite edited <file>s."), - cl::cat(ClangRenameOptions)); -static cl::list<std::string> - QualifiedNames("qualified-name", - cl::desc("The fully qualified name of the symbol."), - cl::cat(ClangRenameOptions)); - -static cl::list<std::string> - NewNames("new-name", cl::desc("The new name to change the symbol to."), - cl::cat(ClangRenameOptions)); -static cl::opt<bool> PrintName( - "pn", - cl::desc("Print the found symbol's name prior to renaming to stderr."), - cl::cat(ClangRenameOptions)); -static cl::opt<bool> PrintLocations( - "pl", cl::desc("Print the locations affected by renaming to stderr."), - cl::cat(ClangRenameOptions)); -static cl::opt<std::string> - ExportFixes("export-fixes", - cl::desc("YAML file to store suggested fixes in."), - cl::value_desc("filename"), cl::cat(ClangRenameOptions)); -static cl::opt<std::string> - Input("input", cl::desc("YAML file to load oldname-newname pairs from."), - cl::Optional, cl::cat(ClangRenameOptions)); -static cl::opt<bool> Force("force", - cl::desc("Ignore nonexistent qualified names."), - cl::cat(ClangRenameOptions)); - -int main(int argc, const char **argv) { - auto ExpectedParser = - tooling::CommonOptionsParser::create(argc, argv, ClangRenameOptions); - if (!ExpectedParser) { - llvm::errs() << ExpectedParser.takeError(); - return 1; - } - tooling::CommonOptionsParser &OP = ExpectedParser.get(); - - if (!Input.empty()) { - // Populate QualifiedNames and NewNames from a YAML file. - ErrorOr<std::unique_ptr<MemoryBuffer>> Buffer = - llvm::MemoryBuffer::getFile(Input); - if (!Buffer) { - errs() << "clang-rename: failed to read " << Input << ": " - << Buffer.getError().message() << "\n"; - return 1; - } - - std::vector<RenameAllInfo> Infos; - llvm::yaml::Input YAML(Buffer.get()->getBuffer()); - YAML >> Infos; - for (const auto &Info : Infos) { - if (!Info.QualifiedName.empty()) - QualifiedNames.push_back(Info.QualifiedName); - else - SymbolOffsets.push_back(Info.Offset); - NewNames.push_back(Info.NewName); - } - } - - // Check the arguments for correctness. - if (NewNames.empty()) { - errs() << "clang-rename: -new-name must be specified.\n\n"; - return 1; - } - - if (SymbolOffsets.empty() == QualifiedNames.empty()) { - errs() << "clang-rename: -offset and -qualified-name can't be present at " - "the same time.\n"; - return 1; - } - - // Check if NewNames is a valid identifier in C++17. - LangOptions Options; - Options.CPlusPlus = true; - Options.CPlusPlus17 = true; - IdentifierTable Table(Options); - for (const auto &NewName : NewNames) { - auto NewNameTokKind = Table.get(NewName).getTokenID(); - if (!tok::isAnyIdentifier(NewNameTokKind)) { - errs() << "ERROR: new name is not a valid identifier in C++17.\n\n"; - return 1; - } - } - - if (SymbolOffsets.size() + QualifiedNames.size() != NewNames.size()) { - errs() << "clang-rename: number of symbol offsets(" << SymbolOffsets.size() - << ") + number of qualified names (" << QualifiedNames.size() - << ") must be equal to number of new names(" << NewNames.size() - << ").\n\n"; - cl::PrintHelpMessage(); - return 1; - } - - auto Files = OP.getSourcePathList(); - tooling::RefactoringTool Tool(OP.getCompilations(), Files); - tooling::USRFindingAction FindingAction(SymbolOffsets, QualifiedNames, Force); - Tool.run(tooling::newFrontendActionFactory(&FindingAction).get()); - const std::vector<std::vector<std::string>> &USRList = - FindingAction.getUSRList(); - const std::vector<std::string> &PrevNames = FindingAction.getUSRSpellings(); - if (PrintName) { - for (const auto &PrevName : PrevNames) { - outs() << "clang-rename found name: " << PrevName << '\n'; - } - } - - if (FindingAction.errorOccurred()) { - // Diagnostics are already issued at this point. - return 1; - } - - // Perform the renaming. - tooling::RenamingAction RenameAction(NewNames, PrevNames, USRList, - Tool.getReplacements(), PrintLocations); - std::unique_ptr<tooling::FrontendActionFactory> Factory = - tooling::newFrontendActionFactory(&RenameAction); - int ExitCode; - - if (Inplace) { - ExitCode = Tool.runAndSave(Factory.get()); - } else { - ExitCode = Tool.run(Factory.get()); - - if (!ExportFixes.empty()) { - std::error_code EC; - llvm::raw_fd_ostream OS(ExportFixes, EC, llvm::sys::fs::OF_None); - if (EC) { - llvm::errs() << "Error opening output file: " << EC.message() << '\n'; - return 1; - } - - // Export replacements. - tooling::TranslationUnitReplacements TUR; - const auto &FileToReplacements = Tool.getReplacements(); - for (const auto &Entry : FileToReplacements) - TUR.Replacements.insert(TUR.Replacements.end(), Entry.second.begin(), - Entry.second.end()); - - yaml::Output YAML(OS); - YAML << TUR; - OS.close(); - return 0; - } - - // Write every file to stdout. Right now we just barf the files without any - // indication of which files start where, other than that we print the files - // in the same order we see them. - LangOptions DefaultLangOptions; - IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions(); - TextDiagnosticPrinter DiagnosticPrinter(errs(), &*DiagOpts); - DiagnosticsEngine Diagnostics( - IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs()), &*DiagOpts, - &DiagnosticPrinter, false); - auto &FileMgr = Tool.getFiles(); - SourceManager Sources(Diagnostics, FileMgr); - Rewriter Rewrite(Sources, DefaultLangOptions); - - Tool.applyAllReplacements(Rewrite); - for (const auto &File : Files) { - auto Entry = FileMgr.getOptionalFileRef(File); - if (!Entry) { - errs() << "clang-rename: " << File << " does not exist.\n"; - return 1; - } - const auto ID = Sources.getOrCreateFileID(*Entry, SrcMgr::C_User); - Rewrite.getEditBuffer(ID).write(outs()); - } - } - - return ExitCode; -} diff --git a/clang/tools/clang-rename/clang-rename.el b/clang/tools/clang-rename/clang-rename.el deleted file mode 100644 index 3f47c11e2c75..000000000000 --- a/clang/tools/clang-rename/clang-rename.el +++ /dev/null @@ -1,80 +0,0 @@ -;;; clang-rename.el --- Renames every occurrence of a symbol found at <offset>. -*- lexical-binding: t; -*- - -;; Version: 0.1.0 -;; Keywords: tools, c - -;;; Commentary: - -;; To install clang-rename.el make sure the directory of this file is in your -;; `load-path' and add -;; -;; (require 'clang-rename) -;; -;; to your .emacs configuration. - -;;; Code: - -(defgroup clang-rename nil - "Integration with clang-rename" - :group 'c) - -(defcustom clang-rename-binary "clang-rename" - "Path to clang-rename executable." - :type '(file :must-match t) - :group 'clang-rename) - -;;;###autoload -(defun clang-rename (new-name) - "Rename all instances of the symbol at point to NEW-NAME using clang-rename." - (interactive "sEnter a new name: ") - (save-some-buffers :all) - ;; clang-rename should not be combined with other operations when undoing. - (undo-boundary) - (let ((output-buffer (get-buffer-create "*clang-rename*"))) - (with-current-buffer output-buffer (erase-buffer)) - (let ((exit-code (call-process - clang-rename-binary nil output-buffer nil - (format "-offset=%d" - ;; clang-rename wants file (byte) offsets, not - ;; buffer (character) positions. - (clang-rename--bufferpos-to-filepos - ;; Emacs treats one character after a symbol as - ;; part of the symbol, but clang-rename doesn’t. - ;; Use the beginning of the current symbol, if - ;; available, to resolve the inconsistency. - (or (car (bounds-of-thing-at-point 'symbol)) - (point)) - 'exact)) - (format "-new-name=%s" new-name) - "-i" (buffer-file-name)))) - (if (and (integerp exit-code) (zerop exit-code)) - ;; Success; revert current buffer so it gets the modifications. - (progn - (kill-buffer output-buffer) - (revert-buffer :ignore-auto :noconfirm :preserve-modes)) - ;; Failure; append exit code to output buffer and display it. - (let ((message (clang-rename--format-message - "clang-rename failed with %s %s" - (if (integerp exit-code) "exit status" "signal") - exit-code))) - (with-current-buffer output-buffer - (insert ?\n message ?\n)) - (message "%s" message) - (display-buffer output-buffer)))))) - -(defalias 'clang-rename--bufferpos-to-filepos - (if (fboundp 'bufferpos-to-filepos) - 'bufferpos-to-filepos - ;; Emacs 24 doesn’t have ‘bufferpos-to-filepos’, simulate it using - ;; ‘position-bytes’. - (lambda (position &optional _quality _coding-system) - (1- (position-bytes position))))) - -;; ‘format-message’ is new in Emacs 25.1. Provide a fallback for older -;; versions. -(defalias 'clang-rename--format-message - (if (fboundp 'format-message) 'format-message 'format)) - -(provide 'clang-rename) - -;;; clang-rename.el ends here diff --git a/clang/tools/clang-rename/clang-rename.py b/clang/tools/clang-rename/clang-rename.py deleted file mode 100644 index 1cbabaf859a5..000000000000 --- a/clang/tools/clang-rename/clang-rename.py +++ /dev/null @@ -1,70 +0,0 @@ -""" -Minimal clang-rename integration with Vim. - -Before installing make sure one of the following is satisfied: - -* clang-rename is in your PATH -* `g:clang_rename_path` in ~/.vimrc points to valid clang-rename executable -* `binary` in clang-rename.py points to valid to clang-rename executable - -To install, simply put this into your ~/.vimrc for python2 support - - noremap <leader>cr :pyf <path-to>/clang-rename.py<cr> - -For python3 use the following command (note the change from :pyf to :py3f) - - noremap <leader>cr :py3f <path-to>/clang-rename.py<cr> - -IMPORTANT NOTE: Before running the tool, make sure you saved the file. - -All you have to do now is to place a cursor on a variable/function/class which -you would like to rename and press '<leader>cr'. You will be prompted for a new -name if the cursor points to a valid symbol. -""" - -from __future__ import absolute_import, division, print_function -import vim -import subprocess -import sys - - -def main(): - binary = "clang-rename" - if vim.eval('exists("g:clang_rename_path")') == "1": - binary = vim.eval("g:clang_rename_path") - - # Get arguments for clang-rename binary. - offset = int(vim.eval('line2byte(line("."))+col(".")')) - 2 - if offset < 0: - print( - "Couldn't determine cursor position. Is your file empty?", file=sys.stderr - ) - return - filename = vim.current.buffer.name - - new_name_request_message = "type new name:" - new_name = vim.eval("input('{}\n')".format(new_name_request_message)) - - # Call clang-rename. - command = [ - binary, - filename, - "-i", - "-offset", - str(offset), - "-new-name", - str(new_name), - ] - # FIXME: make it possible to run the tool on unsaved file. - p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - stdout, stderr = p.communicate() - - if stderr: - print(stderr) - - # Reload all buffers in Vim. - vim.command("checktime") - - -if __name__ == "__main__": - main() diff --git a/clang/unittests/Basic/DiagnosticTest.cpp b/clang/unittests/Basic/DiagnosticTest.cpp index 746901939171..691d74f697f2 100644 --- a/clang/unittests/Basic/DiagnosticTest.cpp +++ b/clang/unittests/Basic/DiagnosticTest.cpp @@ -17,9 +17,6 @@ using namespace llvm; using namespace clang; void clang::DiagnosticsTestHelper(DiagnosticsEngine &diag) { - unsigned delayedDiagID = 0U; - - EXPECT_EQ(diag.DelayedDiagID, delayedDiagID); EXPECT_FALSE(diag.DiagStates.empty()); EXPECT_TRUE(diag.DiagStatesByLoc.empty()); EXPECT_TRUE(diag.DiagStateOnPushStack.empty()); @@ -83,6 +80,21 @@ TEST(DiagnosticTest, fatalsAsError) { } } +TEST(DiagnosticTest, tooManyErrorsIsAlwaysFatal) { + DiagnosticsEngine Diags(new DiagnosticIDs(), new DiagnosticOptions, + new IgnoringDiagConsumer()); + Diags.setFatalsAsError(true); + + // Report a fatal_too_many_errors diagnostic to ensure that still + // acts as a fatal error despite downgrading fatal errors to errors. + Diags.Report(diag::fatal_too_many_errors); + EXPECT_TRUE(Diags.hasFatalErrorOccurred()); + + // Ensure that the severity of that diagnostic is really "fatal". + EXPECT_EQ(Diags.getDiagnosticLevel(diag::fatal_too_many_errors, {}), + DiagnosticsEngine::Level::Fatal); +} + // Check that soft RESET works as intended TEST(DiagnosticTest, softReset) { DiagnosticsEngine Diags(new DiagnosticIDs(), new DiagnosticOptions, @@ -104,7 +116,6 @@ TEST(DiagnosticTest, softReset) { // Check for private variables of DiagnosticsEngine differentiating soft reset DiagnosticsTestHelper(Diags); - EXPECT_FALSE(Diags.isDiagnosticInFlight()); EXPECT_TRUE(Diags.isLastDiagnosticIgnored()); } diff --git a/clang/unittests/CMakeLists.txt b/clang/unittests/CMakeLists.txt index e43ee7bfa88a..85d265426ec8 100644 --- a/clang/unittests/CMakeLists.txt +++ b/clang/unittests/CMakeLists.txt @@ -48,7 +48,6 @@ if(NOT WIN32 AND CLANG_TOOL_LIBCLANG_BUILD) add_subdirectory(libclang) endif() add_subdirectory(DirectoryWatcher) -add_subdirectory(Rename) add_subdirectory(Index) add_subdirectory(InstallAPI) add_subdirectory(Serialization) diff --git a/clang/unittests/Driver/DXCModeTest.cpp b/clang/unittests/Driver/DXCModeTest.cpp index 41ab30bc81d5..2a079a62f1bc 100644 --- a/clang/unittests/Driver/DXCModeTest.cpp +++ b/clang/unittests/Driver/DXCModeTest.cpp @@ -51,7 +51,6 @@ static void validateTargetProfile( EXPECT_TRUE(C); EXPECT_EQ(Diags.getNumErrors(), NumOfErrors); EXPECT_STREQ(DiagConsumer->Errors.back().c_str(), ExpectError.data()); - Diags.Clear(); DiagConsumer->clear(); } @@ -160,7 +159,6 @@ TEST(DxcModeTest, ValidatorVersionValidation) { DiagConsumer->Errors.back().c_str(), "invalid validator version : 0.1; if validator major version is 0, " "minor version must also be 0"); - Diags.Clear(); DiagConsumer->clear(); Args = TheDriver.ParseArgStrings({"-validator-version", "1"}, false, @@ -176,7 +174,6 @@ TEST(DxcModeTest, ValidatorVersionValidation) { EXPECT_STREQ(DiagConsumer->Errors.back().c_str(), "invalid validator version : 1; format of validator version is " "\"<major>.<minor>\" (ex:\"1.4\")"); - Diags.Clear(); DiagConsumer->clear(); Args = TheDriver.ParseArgStrings({"-validator-version", "-Tlib_6_7"}, false, @@ -193,7 +190,6 @@ TEST(DxcModeTest, ValidatorVersionValidation) { DiagConsumer->Errors.back().c_str(), "invalid validator version : -Tlib_6_7; format of validator version is " "\"<major>.<minor>\" (ex:\"1.4\")"); - Diags.Clear(); DiagConsumer->clear(); Args = TheDriver.ParseArgStrings({"-validator-version", "foo"}, false, @@ -210,7 +206,6 @@ TEST(DxcModeTest, ValidatorVersionValidation) { DiagConsumer->Errors.back().c_str(), "invalid validator version : foo; format of validator version is " "\"<major>.<minor>\" (ex:\"1.4\")"); - Diags.Clear(); DiagConsumer->clear(); } diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 5ebf0d7068dd..53aa93a7a4fb 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -17282,6 +17282,12 @@ TEST_F(FormatTest, ConfigurableSpacesInParens) { Spaces.SpacesInParens = FormatStyle::SIPO_Custom; Spaces.SpacesInParensOptions = {}; Spaces.SpacesInParensOptions.Other = true; + + EXPECT_FALSE(Spaces.SpacesInParensOptions.InConditionalStatements); + verifyFormat("if (a)\n" + " return;", + Spaces); + Spaces.SpacesInParensOptions.InConditionalStatements = true; verifyFormat("do_something( ::globalVar );", Spaces); verifyFormat("call( x, y, z );", Spaces); @@ -27577,6 +27583,12 @@ TEST_F(FormatTest, InsertNewlineAtEOF) { verifyNoChange("int i;\n", Style); verifyFormat("int i;\n", "int i;", Style); + + constexpr StringRef Code{"namespace {\n" + "int i;\n" + "} // namespace"}; + verifyFormat(Code.str() + '\n', Code, Style, + {tooling::Range(19, 13)}); // line 3 } TEST_F(FormatTest, KeepEmptyLinesAtEOF) { diff --git a/clang/unittests/Format/FormatTestBase.h b/clang/unittests/Format/FormatTestBase.h index 33110ca5d9ed..9d9472964fd3 100644 --- a/clang/unittests/Format/FormatTestBase.h +++ b/clang/unittests/Format/FormatTestBase.h @@ -61,23 +61,23 @@ protected: return *Result; } - FormatStyle getStyleWithColumns(FormatStyle Style, unsigned ColumnLimit) { + FormatStyle getStyleWithColumns(FormatStyle Style, + unsigned ColumnLimit) const { Style.ColumnLimit = ColumnLimit; return Style; } - FormatStyle getLLVMStyleWithColumns(unsigned ColumnLimit) { + FormatStyle getLLVMStyleWithColumns(unsigned ColumnLimit) const { return getStyleWithColumns(getLLVMStyle(), ColumnLimit); } - FormatStyle getGoogleStyleWithColumns(unsigned ColumnLimit) { + FormatStyle getGoogleStyleWithColumns(unsigned ColumnLimit) const { return getStyleWithColumns(getGoogleStyle(), ColumnLimit); } - FormatStyle getTextProtoStyleWithColumns(unsigned ColumnLimit) { - FormatStyle Style = getGoogleStyle(FormatStyle::FormatStyle::LK_TextProto); - Style.ColumnLimit = ColumnLimit; - return Style; + FormatStyle getTextProtoStyleWithColumns(unsigned ColumnLimit) const { + return getStyleWithColumns(getGoogleStyle(FormatStyle::LK_TextProto), + ColumnLimit); } bool _verifyFormat(const char *File, int Line, StringRef Expected, diff --git a/clang/unittests/Format/FormatTestProto.cpp b/clang/unittests/Format/FormatTestProto.cpp index 5adb532ae4a4..30ce57c545ec 100644 --- a/clang/unittests/Format/FormatTestProto.cpp +++ b/clang/unittests/Format/FormatTestProto.cpp @@ -516,8 +516,6 @@ TEST_F(FormatTestProto, AcceptsOperatorAsKeyInOptions) { } TEST_F(FormatTestProto, BreaksEntriesOfSubmessagesContainingSubmessages) { - FormatStyle Style = getGoogleStyle(FormatStyle::LK_TextProto); - Style.ColumnLimit = 60; // The column limit allows for the keys submessage to be put on 1 line, but we // break it since it contains a submessage an another entry. verifyFormat("option (MyProto.options) = {\n" diff --git a/clang/unittests/Format/FormatTestTextProto.cpp b/clang/unittests/Format/FormatTestTextProto.cpp index 23f46202a346..fd65c9a58db5 100644 --- a/clang/unittests/Format/FormatTestTextProto.cpp +++ b/clang/unittests/Format/FormatTestTextProto.cpp @@ -18,9 +18,7 @@ namespace { class FormatTestTextProto : public FormatTestBase { protected: virtual FormatStyle getDefaultStyle() const override { - FormatStyle Style = getGoogleStyle(FormatStyle::LK_TextProto); - Style.ColumnLimit = 60; // To make writing tests easier. - return Style; + return getTextProtoStyleWithColumns(60); } }; @@ -126,7 +124,8 @@ TEST_F(FormatTestTextProto, ImplicitStringLiteralConcatenation) { " 'bbbbb'"); verifyFormat("field_a: \"aaaaa\"\n" " \"bbbbb\""); - FormatStyle Style = getGoogleStyle(FormatStyle::LK_TextProto); + + auto Style = getDefaultStyle(); Style.AlwaysBreakBeforeMultilineStrings = true; verifyFormat("field_a:\n" " 'aaaaa'\n" @@ -359,46 +358,40 @@ TEST_F(FormatTestTextProto, KeepsCommentsIndentedInList) { } TEST_F(FormatTestTextProto, UnderstandsHashComments) { - FormatStyle Style = getGoogleStyle(FormatStyle::LK_TextProto); - Style.ColumnLimit = 60; // To make writing tests easier. - EXPECT_EQ("aaa: 100\n" - "## this is a double-hash comment.\n" - "bb: 100\n" - "## another double-hash comment.\n" - "### a triple-hash comment\n" - "cc: 200\n" - "### another triple-hash comment\n" - "#### a quadriple-hash comment\n" - "dd: 100\n" - "#### another quadriple-hash comment", - format("aaa: 100\n" - "##this is a double-hash comment.\n" - "bb: 100\n" - "## another double-hash comment.\n" - "###a triple-hash comment\n" - "cc: 200\n" - "### another triple-hash comment\n" - "####a quadriple-hash comment\n" - "dd: 100\n" - "#### another quadriple-hash comment", - Style)); + auto Style = getDefaultStyle(); + + verifyFormat("aaa: 100\n" + "## this is a double-hash comment.\n" + "bb: 100\n" + "## another double-hash comment.\n" + "### a triple-hash comment\n" + "cc: 200\n" + "### another triple-hash comment\n" + "#### a quadriple-hash comment\n" + "dd: 100\n" + "#### another quadriple-hash comment", + "aaa: 100\n" + "##this is a double-hash comment.\n" + "bb: 100\n" + "## another double-hash comment.\n" + "###a triple-hash comment\n" + "cc: 200\n" + "### another triple-hash comment\n" + "####a quadriple-hash comment\n" + "dd: 100\n" + "#### another quadriple-hash comment", + Style); // Ensure we support a common pattern for naming sections. - EXPECT_EQ("##############\n" - "# section name\n" - "##############", - format("##############\n" - "# section name\n" - "##############", - Style)); - - EXPECT_EQ("///////////////\n" - "// section name\n" - "///////////////", - format("///////////////\n" - "// section name\n" - "///////////////", - Style)); + verifyFormat("##############\n" + "# section name\n" + "##############", + Style); + + verifyFormat("///////////////\n" + "// section name\n" + "///////////////", + Style); } TEST_F(FormatTestTextProto, FormatsExtensions) { @@ -519,8 +512,8 @@ TEST_F(FormatTestTextProto, FormatsRepeatedListInitializers) { " ]\n" "}\n" "key: value"); - FormatStyle Style = getGoogleStyle(FormatStyle::LK_TextProto); - Style.ColumnLimit = 60; // To make writing tests easier. + + auto Style = getDefaultStyle(); Style.Cpp11BracedListStyle = true; verifyFormat("keys: [1]", Style); } @@ -544,7 +537,6 @@ TEST_F(FormatTestTextProto, BreaksConsecutiveStringLiterals) { } TEST_F(FormatTestTextProto, PutsMultipleEntriesInExtensionsOnNewlines) { - FormatStyle Style = getGoogleStyle(FormatStyle::LK_TextProto); verifyFormat("pppppppppp: {\n" " ssssss: \"http://example.com/blahblahblah\"\n" " ppppppp: \"sssss/MMMMMMMMMMMM\"\n" @@ -556,12 +548,10 @@ TEST_F(FormatTestTextProto, PutsMultipleEntriesInExtensionsOnNewlines) { " key: value\n" " }\n" "}", - Style); + getGoogleStyle(FormatStyle::LK_TextProto)); } TEST_F(FormatTestTextProto, BreaksAfterBraceFollowedByClosingBraceOnNextLine) { - FormatStyle Style = getGoogleStyle(FormatStyle::LK_TextProto); - Style.ColumnLimit = 60; verifyFormat("keys: [\n" " data: { item: 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' }\n" "]"); @@ -571,10 +561,6 @@ TEST_F(FormatTestTextProto, BreaksAfterBraceFollowedByClosingBraceOnNextLine) { } TEST_F(FormatTestTextProto, BreaksEntriesOfSubmessagesContainingSubmessages) { - FormatStyle Style = getGoogleStyle(FormatStyle::LK_TextProto); - Style.ColumnLimit = 60; - // The column limit allows for the keys submessage to be put on 1 line, but we - // break it since it contains a submessage an another entry. verifyFormat("key: valueeeeeeee\n" "keys: {\n" " item: 'aaaaaaaaaaaaaaaa'\n" diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index baa5ab0ac5e4..34c03d668a9a 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -781,6 +781,14 @@ TEST_F(TokenAnnotatorTest, UnderstandsCasts) { EXPECT_TOKEN(Tokens[9], tok::r_paren, TT_Unknown); EXPECT_TOKEN(Tokens[10], tok::minus, TT_BinaryOperator); + Tokens = annotate("return (::Type)(1 + 2);"); + ASSERT_EQ(Tokens.size(), 12u) << Tokens; + EXPECT_TOKEN(Tokens[4], tok::r_paren, TT_CastRParen); + + Tokens = annotate("return (Namespace::Class)(1 + 2);"); + ASSERT_EQ(Tokens.size(), 13u) << Tokens; + EXPECT_TOKEN(Tokens[5], tok::r_paren, TT_CastRParen); + auto Style = getLLVMStyle(); Style.TypeNames.push_back("Foo"); Tokens = annotate("#define FOO(bar) foo((Foo)&bar)", Style); diff --git a/clang/unittests/Rename/CMakeLists.txt b/clang/unittests/Rename/CMakeLists.txt deleted file mode 100644 index 6ec0c521551c..000000000000 --- a/clang/unittests/Rename/CMakeLists.txt +++ /dev/null @@ -1,29 +0,0 @@ -set(LLVM_LINK_COMPONENTS - FrontendOpenMP - support - ) - -# We'd like clang/unittests/Tooling/RewriterTestContext.h in the test. -include_directories(${CLANG_SOURCE_DIR}) - -add_clang_unittest(ClangRenameTests - RenameClassTest.cpp - RenameEnumTest.cpp - RenameAliasTest.cpp - RenameMemberTest.cpp - RenameFunctionTest.cpp - ) - -clang_target_link_libraries(ClangRenameTests - PRIVATE - clangAST - clangASTMatchers - clangBasic - clangFormat - clangFrontend - clangRewrite - clangSerialization - clangTooling - clangToolingCore - clangToolingRefactoring - ) diff --git a/clang/unittests/Rename/ClangRenameTest.h b/clang/unittests/Rename/ClangRenameTest.h deleted file mode 100644 index 64033657b579..000000000000 --- a/clang/unittests/Rename/ClangRenameTest.h +++ /dev/null @@ -1,116 +0,0 @@ -//===-- ClangRenameTests.cpp - clang-rename unit tests --------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_UNITTESTS_RENAME_CLANGRENAMETEST_H -#define LLVM_CLANG_UNITTESTS_RENAME_CLANGRENAMETEST_H - -#include "unittests/Tooling/RewriterTestContext.h" -#include "clang/ASTMatchers/ASTMatchFinder.h" -#include "clang/Basic/FileManager.h" -#include "clang/Basic/FileSystemOptions.h" -#include "clang/Format/Format.h" -#include "clang/Frontend/CompilerInstance.h" -#include "clang/Frontend/PCHContainerOperations.h" -#include "clang/Tooling/Refactoring.h" -#include "clang/Tooling/Refactoring/Rename/RenamingAction.h" -#include "clang/Tooling/Refactoring/Rename/USRFindingAction.h" -#include "clang/Tooling/Tooling.h" -#include "llvm/ADT/IntrusiveRefCntPtr.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/VirtualFileSystem.h" -#include "gtest/gtest.h" -#include <memory> -#include <string> -#include <vector> - -namespace clang { -namespace clang_rename { -namespace test { - -struct Case { - std::string Before; - std::string After; - std::string OldName; - std::string NewName; -}; - -class ClangRenameTest : public testing::Test, - public testing::WithParamInterface<Case> { -protected: - void AppendToHeader(StringRef Code) { HeaderContent += Code.str(); } - - std::string runClangRenameOnCode(llvm::StringRef Code, - llvm::StringRef OldName, - llvm::StringRef NewName) { - std::string NewCode; - llvm::raw_string_ostream(NewCode) << llvm::format( - "#include \"%s\"\n%s", HeaderName.c_str(), Code.str().c_str()); - tooling::FileContentMappings FileContents = {{HeaderName, HeaderContent}, - {CCName, NewCode}}; - clang::RewriterTestContext Context; - Context.createInMemoryFile(HeaderName, HeaderContent); - clang::FileID InputFileID = Context.createInMemoryFile(CCName, NewCode); - - tooling::USRFindingAction FindingAction({}, {std::string(OldName)}, false); - std::unique_ptr<tooling::FrontendActionFactory> USRFindingActionFactory = - tooling::newFrontendActionFactory(&FindingAction); - - if (!tooling::runToolOnCodeWithArgs( - USRFindingActionFactory->create(), NewCode, {"-std=c++11"}, CCName, - "clang-rename", std::make_shared<PCHContainerOperations>(), - FileContents)) - return ""; - - const std::vector<std::vector<std::string>> &USRList = - FindingAction.getUSRList(); - std::vector<std::string> NewNames = {std::string(NewName)}; - std::map<std::string, tooling::Replacements> FileToReplacements; - tooling::QualifiedRenamingAction RenameAction(NewNames, USRList, - FileToReplacements); - auto RenameActionFactory = tooling::newFrontendActionFactory(&RenameAction); - if (!tooling::runToolOnCodeWithArgs( - RenameActionFactory->create(), NewCode, {"-std=c++11"}, CCName, - "clang-rename", std::make_shared<PCHContainerOperations>(), - FileContents)) - return ""; - - formatAndApplyAllReplacements(FileToReplacements, Context.Rewrite, "llvm"); - return Context.getRewrittenText(InputFileID); - } - - void CompareSnippets(StringRef Expected, StringRef Actual) { - std::string ExpectedCode; - llvm::raw_string_ostream(ExpectedCode) << llvm::format( - "#include \"%s\"\n%s", HeaderName.c_str(), Expected.str().c_str()); - EXPECT_EQ(format(ExpectedCode), format(Actual)); - } - - std::string format(llvm::StringRef Code) { - tooling::Replacements Replaces = format::reformat( - format::getLLVMStyle(), Code, {tooling::Range(0, Code.size())}); - auto ChangedCode = tooling::applyAllReplacements(Code, Replaces); - EXPECT_TRUE(static_cast<bool>(ChangedCode)); - if (!ChangedCode) { - llvm::errs() << llvm::toString(ChangedCode.takeError()); - return ""; - } - return *ChangedCode; - } - - std::string HeaderContent; - std::string HeaderName = "header.h"; - std::string CCName = "input.cc"; -}; - -} // namespace test -} // namespace clang_rename -} // namesdpace clang - -#endif diff --git a/clang/unittests/Rename/RenameAliasTest.cpp b/clang/unittests/Rename/RenameAliasTest.cpp deleted file mode 100644 index 50fa2c104263..000000000000 --- a/clang/unittests/Rename/RenameAliasTest.cpp +++ /dev/null @@ -1,303 +0,0 @@ -//===-- RenameAliasTest.cpp - unit tests for renaming alias ---------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "ClangRenameTest.h" - -namespace clang { -namespace clang_rename { -namespace test { -namespace { - -class RenameAliasTest : public ClangRenameTest { -public: - RenameAliasTest() { - AppendToHeader(R"( - #define MACRO(x) x - namespace some_ns { - class A { - public: - void foo() {} - struct Nested { - enum NestedEnum { - E1, E2, - }; - }; - }; - } // namespace some_ns - namespace a { - typedef some_ns::A TA; - using UA = some_ns::A; - } // namespace a - namespace b { - typedef some_ns::A TA; - using UA = some_ns::A; - } - template <typename T> class ptr {}; - template <typename T> - - using TPtr = ptr<int>; - )"); - } -}; - -INSTANTIATE_TEST_SUITE_P( - RenameAliasTests, RenameAliasTest, - testing::ValuesIn(std::vector<Case>({ - // basic functions - {"void f(a::TA a1) {}", "void f(b::TB a1) {}", "a::TA", "b::TB"}, - {"void f(a::UA a1) {}", "void f(b::UB a1) {}", "a::UA", "b::UB"}, - {"void f(a::TA* a1) {}", "void f(b::TB* a1) {}", "a::TA", "b::TB"}, - {"void f(a::TA** a1) {}", "void f(b::TB** a1) {}", "a::TA", "b::TB"}, - {"a::TA f() { return a::TA(); }", "b::TB f() { return b::TB(); }", - "a::TA", "b::TB"}, - {"a::TA f() { return a::UA(); }", "b::TB f() { return a::UA(); }", - "a::TA", "b::TB"}, - {"a::TA f() { return a::UA(); }", "a::TA f() { return b::UB(); }", - "a::UA", "b::UB"}, - {"void f() { a::TA a; }", "void f() { b::TB a; }", "a::TA", "b::TB"}, - {"void f(const a::TA& a1) {}", "void f(const b::TB& a1) {}", "a::TA", - "b::TB"}, - {"void f(const a::UA& a1) {}", "void f(const b::UB& a1) {}", "a::UA", - "b::UB"}, - {"void f(const a::TA* a1) {}", "void f(const b::TB* a1) {}", "a::TA", - "b::TB"}, - {"namespace a { void f(TA a1) {} }", - "namespace a { void f(b::TB a1) {} }", "a::TA", "b::TB"}, - {"void f(MACRO(a::TA) a1) {}", "void f(MACRO(b::TB) a1) {}", "a::TA", - "b::TB"}, - {"void f(MACRO(a::TA a1)) {}", "void f(MACRO(b::TB a1)) {}", "a::TA", - "b::TB"}, - - // shorten/add namespace. - {"namespace b { void f(a::UA a1) {} }", - "namespace b {void f(UB a1) {} }", "a::UA", "b::UB"}, - {"namespace a { void f(UA a1) {} }", - "namespace a {void f(b::UB a1) {} }", "a::UA", "b::UB"}, - - // use namespace and typedefs - {"struct S { using T = a::TA; T a_; };", - "struct S { using T = b::TB; T a_; };", "a::TA", "b::TB"}, - {"using T = a::TA; T gA;", "using T = b::TB; T gA;", "a::TA", "b::TB"}, - {"using T = a::UA; T gA;", "using T = b::UB; T gA;", "a::UA", "b::UB"}, - {"typedef a::TA T; T gA;", "typedef b::TB T; T gA;", "a::TA", "b::TB"}, - {"typedef a::UA T; T gA;", "typedef b::UB T; T gA;", "a::UA", "b::UB"}, - {"typedef MACRO(a::TA) T; T gA;", "typedef MACRO(b::TB) T; T gA;", - "a::TA", "b::TB"}, - - // types in using shadows. - {"using a::TA; TA gA;", "using b::TB; b::TB gA;", "a::TA", "b::TB"}, - {"using a::UA; UA gA;", "using b::UB; b::UB gA;", "a::UA", "b::UB"}, - - // struct members and other oddities - {"struct S : public a::TA {};", "struct S : public b::TB {};", "a::TA", - "b::TB"}, - {"struct S : public a::UA {};", "struct S : public b::UB {};", "a::UA", - "b::UB"}, - {"struct F { void f(a::TA a1) {} };", - "struct F { void f(b::TB a1) {} };", "a::TA", "b::TB"}, - {"struct F { a::TA a_; };", "struct F { b::TB a_; };", "a::TA", - "b::TB"}, - {"struct F { ptr<a::TA> a_; };", "struct F { ptr<b::TB> a_; };", - "a::TA", "b::TB"}, - {"struct F { ptr<a::UA> a_; };", "struct F { ptr<b::UB> a_; };", - "a::UA", "b::UB"}, - - // types in nested name specifiers - {"void f() { a::TA::Nested ne; }", "void f() { b::TB::Nested ne; }", - "a::TA", "b::TB"}, - {"void f() { a::UA::Nested ne; }", "void f() { b::UB::Nested ne; }", - "a::UA", "b::UB"}, - {"void f() { a::TA::Nested::NestedEnum e; }", - "void f() { b::TB::Nested::NestedEnum e; }", "a::TA", "b::TB"}, - {"void f() { auto e = a::TA::Nested::NestedEnum::E1; }", - "void f() { auto e = b::TB::Nested::NestedEnum::E1; }", "a::TA", - "b::TB"}, - {"void f() { auto e = a::TA::Nested::E1; }", - "void f() { auto e = b::TB::Nested::E1; }", "a::TA", "b::TB"}, - - // templates - {"template <typename T> struct Foo { T t; }; void f() { Foo<a::TA> " - "foo; }", - "template <typename T> struct Foo { T t; }; void f() { Foo<b::TB> " - "foo; }", - "a::TA", "b::TB"}, - {"template <typename T> struct Foo { a::TA a; };", - "template <typename T> struct Foo { b::TB a; };", "a::TA", "b::TB"}, - {"template <typename T> void f(T t) {} void g() { f<a::TA>(a::TA()); }", - "template <typename T> void f(T t) {} void g() { f<b::TB>(b::TB()); }", - "a::TA", "b::TB"}, - {"template <typename T> void f(T t) {} void g() { f<a::UA>(a::UA()); }", - "template <typename T> void f(T t) {} void g() { f<b::UB>(b::UB()); }", - "a::UA", "b::UB"}, - {"template <typename T> int f() { return 1; } template <> int " - "f<a::TA>() { return 2; } int g() { return f<a::TA>(); }", - "template <typename T> int f() { return 1; } template <> int " - "f<b::TB>() { return 2; } int g() { return f<b::TB>(); }", - "a::TA", "b::TB"}, - {"struct Foo { template <typename T> T foo(); }; void g() { Foo f; " - "auto a = f.template foo<a::TA>(); }", - "struct Foo { template <typename T> T foo(); }; void g() { Foo f; " - "auto a = f.template foo<b::TB>(); }", - "a::TA", "b::TB"}, - {"struct Foo { template <typename T> T foo(); }; void g() { Foo f; " - "auto a = f.template foo<a::UA>(); }", - "struct Foo { template <typename T> T foo(); }; void g() { Foo f; " - "auto a = f.template foo<b::UB>(); }", - "a::UA", "b::UB"}, - - // The following two templates are distilled from regressions found in - // unique_ptr<> and type_traits.h - {"template <typename T> struct outer { typedef T type; type Baz(); }; " - "outer<a::TA> g_A;", - "template <typename T> struct outer { typedef T type; type Baz(); }; " - "outer<b::TB> g_A;", - "a::TA", "b::TB"}, - {"template <typename T> struct nested { typedef T type; }; template " - "<typename T> struct outer { typename nested<T>::type Foo(); }; " - "outer<a::TA> g_A;", - "template <typename T> struct nested { typedef T type; }; template " - "<typename T> struct outer { typename nested<T>::type Foo(); }; " - "outer<b::TB> g_A;", - "a::TA", "b::TB"}, - - // macros - {"#define FOO(T, t) T t\nvoid f() { FOO(a::TA, a1); FOO(a::TA, a2); }", - "#define FOO(T, t) T t\nvoid f() { FOO(b::TB, a1); FOO(b::TB, a2); }", - "a::TA", "b::TB"}, - {"#define FOO(n) a::TA n\nvoid f() { FOO(a1); FOO(a2); }", - "#define FOO(n) b::TB n\nvoid f() { FOO(a1); FOO(a2); }", "a::TA", - "b::TB"}, - {"#define FOO(n) a::UA n\nvoid f() { FOO(a1); FOO(a2); }", - "#define FOO(n) b::UB n\nvoid f() { FOO(a1); FOO(a2); }", "a::UA", - "b::UB"}, - - // Pointer to member functions - {"auto gA = &a::TA::foo;", "auto gA = &b::TB::foo;", "a::TA", "b::TB"}, - {"using a::TA; auto gA = &TA::foo;", - "using b::TB; auto gA = &b::TB::foo;", "a::TA", "b::TB"}, - {"typedef a::TA T; auto gA = &T::foo;", - "typedef b::TB T; auto gA = &T::foo;", "a::TA", "b::TB"}, - {"auto gA = &MACRO(a::TA)::foo;", "auto gA = &MACRO(b::TB)::foo;", - "a::TA", "b::TB"}, - - // templated using alias. - {"void f(TPtr<int> p) {}", "void f(NewTPtr<int> p) {}", "TPtr", - "NewTPtr"}, - {"void f(::TPtr<int> p) {}", "void f(::NewTPtr<int> p) {}", "TPtr", - "NewTPtr"}, - }))); - -TEST_P(RenameAliasTest, RenameAlias) { - auto Param = GetParam(); - assert(!Param.OldName.empty()); - assert(!Param.NewName.empty()); - std::string Actual = - runClangRenameOnCode(Param.Before, Param.OldName, Param.NewName); - CompareSnippets(Param.After, Actual); -} - -TEST_F(RenameAliasTest, RenameTypedefDefinitions) { - std::string Before = R"( - class X {}; - typedef X TOld; - )"; - std::string Expected = R"( - class X {}; - typedef X TNew; - )"; - std::string After = runClangRenameOnCode(Before, "TOld", "TNew"); - CompareSnippets(Expected, After); -} - -TEST_F(RenameAliasTest, RenameUsingAliasDefinitions) { - std::string Before = R"( - class X {}; - using UOld = X; - )"; - std::string Expected = R"( - class X {}; - using UNew = X; - )"; - std::string After = runClangRenameOnCode(Before, "UOld", "UNew"); - CompareSnippets(Expected, After); -} - -TEST_F(RenameAliasTest, RenameTemplatedAliasDefinitions) { - std::string Before = R"( - template <typename T> - class X { T t; }; - - template <typename T> - using Old = X<T>; - )"; - std::string Expected = R"( - template <typename T> - class X { T t; }; - - template <typename T> - using New = X<T>; - )"; - std::string After = runClangRenameOnCode(Before, "Old", "New"); - CompareSnippets(Expected, After); -} - -TEST_F(RenameAliasTest, RenameAliasesInNamespaces) { - std::string Before = R"( - namespace x { class X {}; } - namespace ns { - using UOld = x::X; - } - )"; - std::string Expected = R"( - namespace x { class X {}; } - namespace ns { - using UNew = x::X; - } - )"; - std::string After = runClangRenameOnCode(Before, "ns::UOld", "ns::UNew"); - CompareSnippets(Expected, After); -} - -TEST_F(RenameAliasTest, AliasesInMacros) { - std::string Before = R"( - namespace x { class Old {}; } - namespace ns { - #define REF(alias) alias alias_var; - - #define ALIAS(old) \ - using old##Alias = x::old; \ - REF(old##Alias); - - ALIAS(Old); - - OldAlias old_alias; - } - )"; - std::string Expected = R"( - namespace x { class Old {}; } - namespace ns { - #define REF(alias) alias alias_var; - - #define ALIAS(old) \ - using old##Alias = x::old; \ - REF(old##Alias); - - ALIAS(Old); - - NewAlias old_alias; - } - )"; - std::string After = - runClangRenameOnCode(Before, "ns::OldAlias", "ns::NewAlias"); - CompareSnippets(Expected, After); -} - -} // anonymous namespace -} // namespace test -} // namespace clang_rename -} // namesdpace clang diff --git a/clang/unittests/Rename/RenameClassTest.cpp b/clang/unittests/Rename/RenameClassTest.cpp deleted file mode 100644 index 24370b5795e9..000000000000 --- a/clang/unittests/Rename/RenameClassTest.cpp +++ /dev/null @@ -1,820 +0,0 @@ -//===-- RenameClassTest.cpp - unit tests for renaming classes -------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "ClangRenameTest.h" - -namespace clang { -namespace clang_rename { -namespace test { -namespace { - -class RenameClassTest : public ClangRenameTest { -public: - RenameClassTest() { - AppendToHeader(R"( - namespace a { - class Foo { - public: - struct Nested { - enum NestedEnum {E1, E2}; - }; - void func() {} - static int Constant; - }; - class Goo { - public: - struct Nested { - enum NestedEnum {E1, E2}; - }; - }; - int Foo::Constant = 1; - } // namespace a - namespace b { - class Foo {}; - } // namespace b - - #define MACRO(x) x - - template<typename T> class ptr {}; - )"); - } -}; - -INSTANTIATE_TEST_SUITE_P( - RenameClassTests, RenameClassTest, - testing::ValuesIn(std::vector<Case>({ - // basic classes - {"a::Foo f;", "b::Bar f;", "", ""}, - {"::a::Foo f;", "::b::Bar f;", "", ""}, - {"void f(a::Foo f) {}", "void f(b::Bar f) {}", "", ""}, - {"void f(a::Foo *f) {}", "void f(b::Bar *f) {}", "", ""}, - {"a::Foo f() { return a::Foo(); }", "b::Bar f() { return b::Bar(); }", - "", ""}, - {"namespace a {a::Foo f() { return Foo(); }}", - "namespace a {b::Bar f() { return b::Bar(); }}", "", ""}, - {"void f(const a::Foo& a1) {}", "void f(const b::Bar& a1) {}", "", ""}, - {"void f(const a::Foo* a1) {}", "void f(const b::Bar* a1) {}", "", ""}, - {"namespace a { void f(Foo a1) {} }", - "namespace a { void f(b::Bar a1) {} }", "", ""}, - {"void f(MACRO(a::Foo) a1) {}", "void f(MACRO(b::Bar) a1) {}", "", ""}, - {"void f(MACRO(a::Foo a1)) {}", "void f(MACRO(b::Bar a1)) {}", "", ""}, - {"a::Foo::Nested ns;", "b::Bar::Nested ns;", "", ""}, - {"auto t = a::Foo::Constant;", "auto t = b::Bar::Constant;", "", ""}, - {"a::Foo::Nested ns;", "a::Foo::Nested2 ns;", "a::Foo::Nested", - "a::Foo::Nested2"}, - - // use namespace and typedefs - {"using a::Foo; Foo gA;", "using b::Bar; b::Bar gA;", "", ""}, - {"using a::Foo; void f(Foo gA) {}", "using b::Bar; void f(Bar gA) {}", - "", ""}, - {"using a::Foo; namespace x { Foo gA; }", - "using b::Bar; namespace x { Bar gA; }", "", ""}, - {"struct S { using T = a::Foo; T a_; };", - "struct S { using T = b::Bar; T a_; };", "", ""}, - {"using T = a::Foo; T gA;", "using T = b::Bar; T gA;", "", ""}, - {"typedef a::Foo T; T gA;", "typedef b::Bar T; T gA;", "", ""}, - {"typedef MACRO(a::Foo) T; T gA;", "typedef MACRO(b::Bar) T; T gA;", "", - ""}, - - // struct members and other oddities - {"struct S : public a::Foo {};", "struct S : public b::Bar {};", "", - ""}, - {"struct F { void f(a::Foo a1) {} };", - "struct F { void f(b::Bar a1) {} };", "", ""}, - {"struct F { a::Foo a_; };", "struct F { b::Bar a_; };", "", ""}, - {"struct F { ptr<a::Foo> a_; };", "struct F { ptr<b::Bar> a_; };", "", - ""}, - - {"void f() { a::Foo::Nested ne; }", "void f() { b::Bar::Nested ne; }", - "", ""}, - {"void f() { a::Goo::Nested ne; }", "void f() { a::Goo::Nested ne; }", - "", ""}, - {"void f() { a::Foo::Nested::NestedEnum e; }", - "void f() { b::Bar::Nested::NestedEnum e; }", "", ""}, - {"void f() { auto e = a::Foo::Nested::NestedEnum::E1; }", - "void f() { auto e = b::Bar::Nested::NestedEnum::E1; }", "", ""}, - {"void f() { auto e = a::Foo::Nested::E1; }", - "void f() { auto e = b::Bar::Nested::E1; }", "", ""}, - - // templates - {"template <typename T> struct Foo { T t; };\n" - "void f() { Foo<a::Foo> foo; }", - "template <typename T> struct Foo { T t; };\n" - "void f() { Foo<b::Bar> foo; }", - "", ""}, - {"template <typename T> struct Foo { a::Foo a; };", - "template <typename T> struct Foo { b::Bar a; };", "", ""}, - {"template <typename T> void f(T t) {}\n" - "void g() { f<a::Foo>(a::Foo()); }", - "template <typename T> void f(T t) {}\n" - "void g() { f<b::Bar>(b::Bar()); }", - "", ""}, - {"template <typename T> int f() { return 1; }\n" - "template <> int f<a::Foo>() { return 2; }\n" - "int g() { return f<a::Foo>(); }", - "template <typename T> int f() { return 1; }\n" - "template <> int f<b::Bar>() { return 2; }\n" - "int g() { return f<b::Bar>(); }", - "", ""}, - {"struct Foo { template <typename T> T foo(); };\n" - "void g() { Foo f; auto a = f.template foo<a::Foo>(); }", - "struct Foo { template <typename T> T foo(); };\n" - "void g() { Foo f; auto a = f.template foo<b::Bar>(); }", - "", ""}, - - // The following two templates are distilled from regressions found in - // unique_ptr<> and type_traits.h - {"template <typename T> struct outer {\n" - " typedef T type;\n" - " type Baz();\n" - " };\n" - " outer<a::Foo> g_A;", - "template <typename T> struct outer {\n" - " typedef T type;\n" - " type Baz();\n" - " };\n" - " outer<b::Bar> g_A;", - "", ""}, - {"template <typename T> struct nested { typedef T type; };\n" - "template <typename T> struct outer { typename nested<T>::type Foo(); " - "};\n" - "outer<a::Foo> g_A;", - "template <typename T> struct nested { typedef T type; };\n" - "template <typename T> struct outer { typename nested<T>::type Foo(); " - "};\n" - "outer<b::Bar> g_A;", - "", ""}, - - // macros - {"#define FOO(T, t) T t\n" - "void f() { FOO(a::Foo, a1); FOO(a::Foo, a2); }", - "#define FOO(T, t) T t\n" - "void f() { FOO(b::Bar, a1); FOO(b::Bar, a2); }", - "", ""}, - {"#define FOO(n) a::Foo n\n" - " void f() { FOO(a1); FOO(a2); }", - "#define FOO(n) b::Bar n\n" - " void f() { FOO(a1); FOO(a2); }", - "", ""}, - - // Pointer to member functions - {"auto gA = &a::Foo::func;", "auto gA = &b::Bar::func;", "", ""}, - {"using a::Foo; auto gA = &Foo::func;", - "using b::Bar; auto gA = &b::Bar::func;", "", ""}, - {"using a::Foo; namespace x { auto gA = &Foo::func; }", - "using b::Bar; namespace x { auto gA = &Bar::func; }", "", ""}, - {"typedef a::Foo T; auto gA = &T::func;", - "typedef b::Bar T; auto gA = &T::func;", "", ""}, - {"auto gA = &MACRO(a::Foo)::func;", "auto gA = &MACRO(b::Bar)::func;", - "", ""}, - - // Short match inside a namespace - {"namespace a { void f(Foo a1) {} }", - "namespace a { void f(b::Bar a1) {} }", "", ""}, - - // Correct match. - {"using a::Foo; struct F { ptr<Foo> a_; };", - "using b::Bar; struct F { ptr<Bar> a_; };", "", ""}, - - // avoid false positives - {"void f(b::Foo a) {}", "void f(b::Foo a) {}", "", ""}, - {"namespace b { void f(Foo a) {} }", "namespace b { void f(Foo a) {} }", - "", ""}, - - // friends, everyone needs friends. - {"class Foo { int i; friend class a::Foo; };", - "class Foo { int i; friend class b::Bar; };", "", ""}, - })) ); - -TEST_P(RenameClassTest, RenameClasses) { - auto Param = GetParam(); - std::string OldName = Param.OldName.empty() ? "a::Foo" : Param.OldName; - std::string NewName = Param.NewName.empty() ? "b::Bar" : Param.NewName; - std::string Actual = runClangRenameOnCode(Param.Before, OldName, NewName); - CompareSnippets(Param.After, Actual); -} - -class NamespaceDetectionTest : public ClangRenameTest { -protected: - NamespaceDetectionTest() { - AppendToHeader(R"( - class Old {}; - namespace o1 { - class Old {}; - namespace o2 { - class Old {}; - namespace o3 { - class Old {}; - } // namespace o3 - } // namespace o2 - } // namespace o1 - )"); - } -}; - -INSTANTIATE_TEST_SUITE_P( - RenameClassTest, NamespaceDetectionTest, - ::testing::ValuesIn(std::vector<Case>({ - // Test old and new namespace overlap. - {"namespace o1 { namespace o2 { namespace o3 { Old moo; } } }", - "namespace o1 { namespace o2 { namespace o3 { New moo; } } }", - "o1::o2::o3::Old", "o1::o2::o3::New"}, - {"namespace o1 { namespace o2 { namespace o3 { Old moo; } } }", - "namespace o1 { namespace o2 { namespace o3 { n3::New moo; } } }", - "o1::o2::o3::Old", "o1::o2::n3::New"}, - {"namespace o1 { namespace o2 { namespace o3 { Old moo; } } }", - "namespace o1 { namespace o2 { namespace o3 { n2::n3::New moo; } } }", - "o1::o2::o3::Old", "o1::n2::n3::New"}, - {"namespace o1 { namespace o2 { Old moo; } }", - "namespace o1 { namespace o2 { New moo; } }", "::o1::o2::Old", - "::o1::o2::New"}, - {"namespace o1 { namespace o2 { Old moo; } }", - "namespace o1 { namespace o2 { n2::New moo; } }", "::o1::o2::Old", - "::o1::n2::New"}, - {"namespace o1 { namespace o2 { Old moo; } }", - "namespace o1 { namespace o2 { ::n1::n2::New moo; } }", - "::o1::o2::Old", "::n1::n2::New"}, - {"namespace o1 { namespace o2 { Old moo; } }", - "namespace o1 { namespace o2 { n1::n2::New moo; } }", "::o1::o2::Old", - "n1::n2::New"}, - - // Test old and new namespace with differing depths. - {"namespace o1 { namespace o2 { namespace o3 { Old moo; } } }", - "namespace o1 { namespace o2 { namespace o3 { New moo; } } }", - "o1::o2::o3::Old", "::o1::New"}, - {"namespace o1 { namespace o2 { namespace o3 { Old moo; } } }", - "namespace o1 { namespace o2 { namespace o3 { New moo; } } }", - "o1::o2::o3::Old", "::o1::o2::New"}, - {"namespace o1 { namespace o2 { namespace o3 { Old moo; } } }", - "namespace o1 { namespace o2 { namespace o3 { New moo; } } }", - "o1::o2::o3::Old", "o1::New"}, - {"namespace o1 { namespace o2 { namespace o3 { Old moo; } } }", - "namespace o1 { namespace o2 { namespace o3 { New moo; } } }", - "o1::o2::o3::Old", "o1::o2::New"}, - {"Old moo;", "o1::New moo;", "::Old", "o1::New"}, - {"Old moo;", "o1::New moo;", "Old", "o1::New"}, - {"namespace o1 { ::Old moo; }", "namespace o1 { New moo; }", "Old", - "o1::New"}, - {"namespace o1 { namespace o2 { Old moo; } }", - "namespace o1 { namespace o2 { ::New moo; } }", "::o1::o2::Old", - "::New"}, - {"namespace o1 { namespace o2 { Old moo; } }", - "namespace o1 { namespace o2 { New moo; } }", "::o1::o2::Old", "New"}, - - // Test moving into the new namespace at different levels. - {"namespace n1 { namespace n2 { o1::o2::Old moo; } }", - "namespace n1 { namespace n2 { New moo; } }", "::o1::o2::Old", - "::n1::n2::New"}, - {"namespace n1 { namespace n2 { o1::o2::Old moo; } }", - "namespace n1 { namespace n2 { New moo; } }", "::o1::o2::Old", - "n1::n2::New"}, - {"namespace n1 { namespace n2 { o1::o2::Old moo; } }", - "namespace n1 { namespace n2 { o2::New moo; } }", "::o1::o2::Old", - "::n1::o2::New"}, - {"namespace n1 { namespace n2 { o1::o2::Old moo; } }", - "namespace n1 { namespace n2 { o2::New moo; } }", "::o1::o2::Old", - "n1::o2::New"}, - {"namespace n1 { namespace n2 { o1::o2::Old moo; } }", - "namespace n1 { namespace n2 { ::o1::o2::New moo; } }", - "::o1::o2::Old", "::o1::o2::New"}, - {"namespace n1 { namespace n2 { o1::o2::Old moo; } }", - "namespace n1 { namespace n2 { o1::o2::New moo; } }", "::o1::o2::Old", - "o1::o2::New"}, - - // Test friends declarations. - {"class Foo { friend class o1::Old; };", - "class Foo { friend class o1::New; };", "o1::Old", "o1::New"}, - {"class Foo { int i; friend class o1::Old; };", - "class Foo { int i; friend class ::o1::New; };", "::o1::Old", - "::o1::New"}, - {"namespace o1 { class Foo { int i; friend class Old; }; }", - "namespace o1 { class Foo { int i; friend class New; }; }", "o1::Old", - "o1::New"}, - {"namespace o1 { class Foo { int i; friend class Old; }; }", - "namespace o1 { class Foo { int i; friend class New; }; }", - "::o1::Old", "::o1::New"}, - })) ); - -TEST_P(NamespaceDetectionTest, RenameClasses) { - auto Param = GetParam(); - std::string Actual = - runClangRenameOnCode(Param.Before, Param.OldName, Param.NewName); - CompareSnippets(Param.After, Actual); -} - -class TemplatedClassRenameTest : public ClangRenameTest { -protected: - TemplatedClassRenameTest() { - AppendToHeader(R"( - template <typename T> struct Old { - T t_; - T f() { return T(); }; - static T s(T t) { return t; } - }; - namespace ns { - template <typename T> struct Old { - T t_; - T f() { return T(); }; - static T s(T t) { return t; } - }; - } // namespace ns - - namespace o1 { - namespace o2 { - namespace o3 { - template <typename T> struct Old { - T t_; - T f() { return T(); }; - static T s(T t) { return t; } - }; - } // namespace o3 - } // namespace o2 - } // namespace o1 - )"); - } -}; - -INSTANTIATE_TEST_SUITE_P( - RenameClassTests, TemplatedClassRenameTest, - ::testing::ValuesIn(std::vector<Case>({ - {"Old<int> gI; Old<bool> gB;", "New<int> gI; New<bool> gB;", "Old", - "New"}, - {"ns::Old<int> gI; ns::Old<bool> gB;", - "ns::New<int> gI; ns::New<bool> gB;", "ns::Old", "ns::New"}, - {"auto gI = &Old<int>::f; auto gB = &Old<bool>::f;", - "auto gI = &New<int>::f; auto gB = &New<bool>::f;", "Old", "New"}, - {"auto gI = &ns::Old<int>::f;", "auto gI = &ns::New<int>::f;", - "ns::Old", "ns::New"}, - - {"int gI = Old<int>::s(0); bool gB = Old<bool>::s(false);", - "int gI = New<int>::s(0); bool gB = New<bool>::s(false);", "Old", - "New"}, - {"int gI = ns::Old<int>::s(0); bool gB = ns::Old<bool>::s(false);", - "int gI = ns::New<int>::s(0); bool gB = ns::New<bool>::s(false);", - "ns::Old", "ns::New"}, - - {"struct S { Old<int*> o_; };", "struct S { New<int*> o_; };", "Old", - "New"}, - {"struct S { ns::Old<int*> o_; };", "struct S { ns::New<int*> o_; };", - "ns::Old", "ns::New"}, - - {"auto a = reinterpret_cast<Old<int>*>(new Old<int>);", - "auto a = reinterpret_cast<New<int>*>(new New<int>);", "Old", "New"}, - {"auto a = reinterpret_cast<ns::Old<int>*>(new ns::Old<int>);", - "auto a = reinterpret_cast<ns::New<int>*>(new ns::New<int>);", - "ns::Old", "ns::New"}, - {"auto a = reinterpret_cast<const Old<int>*>(new Old<int>);", - "auto a = reinterpret_cast<const New<int>*>(new New<int>);", "Old", - "New"}, - {"auto a = reinterpret_cast<const ns::Old<int>*>(new ns::Old<int>);", - "auto a = reinterpret_cast<const ns::New<int>*>(new ns::New<int>);", - "ns::Old", "ns::New"}, - - {"Old<bool>& foo();", "New<bool>& foo();", "Old", "New"}, - {"ns::Old<bool>& foo();", "ns::New<bool>& foo();", "ns::Old", - "ns::New"}, - {"o1::o2::o3::Old<bool>& foo();", "o1::o2::o3::New<bool>& foo();", - "o1::o2::o3::Old", "o1::o2::o3::New"}, - {"namespace ns { Old<bool>& foo(); }", - "namespace ns { New<bool>& foo(); }", "ns::Old", "ns::New"}, - {"const Old<bool>& foo();", "const New<bool>& foo();", "Old", "New"}, - {"const ns::Old<bool>& foo();", "const ns::New<bool>& foo();", - "ns::Old", "ns::New"}, - - // FIXME: figure out why this only works when Moo gets - // specialized at some point. - {"template <typename T> struct Moo { Old<T> o_; }; Moo<int> m;", - "template <typename T> struct Moo { New<T> o_; }; Moo<int> m;", "Old", - "New"}, - {"template <typename T> struct Moo { ns::Old<T> o_; }; Moo<int> m;", - "template <typename T> struct Moo { ns::New<T> o_; }; Moo<int> m;", - "ns::Old", "ns::New"}, - })) ); - -TEST_P(TemplatedClassRenameTest, RenameTemplateClasses) { - auto Param = GetParam(); - std::string Actual = - runClangRenameOnCode(Param.Before, Param.OldName, Param.NewName); - CompareSnippets(Param.After, Actual); -} - -TEST_F(ClangRenameTest, RenameClassWithOutOfLineMembers) { - std::string Before = R"( - class Old { - public: - Old(); - ~Old(); - - Old* next(); - - private: - Old* next_; - }; - - Old::Old() {} - Old::~Old() {} - Old* Old::next() { return next_; } - )"; - std::string Expected = R"( - class New { - public: - New(); - ~New(); - - New* next(); - - private: - New* next_; - }; - - New::New() {} - New::~New() {} - New* New::next() { return next_; } - )"; - std::string After = runClangRenameOnCode(Before, "Old", "New"); - CompareSnippets(Expected, After); -} - -TEST_F(ClangRenameTest, RenameClassWithInlineMembers) { - std::string Before = R"( - class Old { - public: - Old() {} - ~Old() {} - - Old* next() { return next_; } - - private: - Old* next_; - }; - )"; - std::string Expected = R"( - class New { - public: - New() {} - ~New() {} - - New* next() { return next_; } - - private: - New* next_; - }; - )"; - std::string After = runClangRenameOnCode(Before, "Old", "New"); - CompareSnippets(Expected, After); -} - -TEST_F(ClangRenameTest, RenameClassWithNamespaceWithInlineMembers) { - std::string Before = R"( - namespace ns { - class Old { - public: - Old() {} - ~Old() {} - - Old* next() { return next_; } - - private: - Old* next_; - }; - } // namespace ns - )"; - std::string Expected = R"( - namespace ns { - class New { - public: - New() {} - ~New() {} - - New* next() { return next_; } - - private: - New* next_; - }; - } // namespace ns - )"; - std::string After = runClangRenameOnCode(Before, "ns::Old", "ns::New"); - CompareSnippets(Expected, After); -} - -TEST_F(ClangRenameTest, RenameClassWithNamespaceWithOutOfInlineMembers) { - std::string Before = R"( - namespace ns { - class Old { - public: - Old(); - ~Old(); - - Old* next(); - - private: - Old* next_; - }; - - Old::Old() {} - Old::~Old() {} - Old* Old::next() { return next_; } - } // namespace ns - )"; - std::string Expected = R"( - namespace ns { - class New { - public: - New(); - ~New(); - - New* next(); - - private: - New* next_; - }; - - New::New() {} - New::~New() {} - New* New::next() { return next_; } - } // namespace ns - )"; - std::string After = runClangRenameOnCode(Before, "ns::Old", "ns::New"); - CompareSnippets(Expected, After); -} - -TEST_F(ClangRenameTest, RenameClassInInheritedConstructor) { - // `using Base::Base;` will generate an implicit constructor containing usage - // of `::ns::Old` which should not be matched. - std::string Before = R"( - namespace ns { - class Old; - class Old { - int x; - }; - class Base { - protected: - Old *moo_; - public: - Base(Old *moo) : moo_(moo) {} - }; - class Derived : public Base { - public: - using Base::Base; - }; - } // namespace ns - int main() { - ::ns::Old foo; - ::ns::Derived d(&foo); - return 0; - })"; - std::string Expected = R"( - namespace ns { - class New; - class New { - int x; - }; - class Base { - protected: - New *moo_; - public: - Base(New *moo) : moo_(moo) {} - }; - class Derived : public Base { - public: - using Base::Base; - }; - } // namespace ns - int main() { - ::ns::New foo; - ::ns::Derived d(&foo); - return 0; - })"; - std::string After = runClangRenameOnCode(Before, "ns::Old", "ns::New"); - CompareSnippets(Expected, After); -} - -TEST_F(ClangRenameTest, DontRenameReferencesInImplicitFunction) { - std::string Before = R"( - namespace ns { - class Old { - }; - } // namespace ns - struct S { - int y; - ns::Old old; - }; - void f() { - S s1, s2, s3; - // This causes an implicit assignment operator to be created. - s1 = s2 = s3; - } - )"; - std::string Expected = R"( - namespace ns { - class New { - }; - } // namespace ns - struct S { - int y; - ::new_ns::New old; - }; - void f() { - S s1, s2, s3; - // This causes an implicit assignment operator to be created. - s1 = s2 = s3; - } - )"; - std::string After = runClangRenameOnCode(Before, "ns::Old", "::new_ns::New"); - CompareSnippets(Expected, After); -} - -TEST_F(ClangRenameTest, ReferencesInLambdaFunctionParameters) { - std::string Before = R"( - template <class T> - class function; - template <class R, class... ArgTypes> - class function<R(ArgTypes...)> { - public: - template <typename Functor> - function(Functor f) {} - - function() {} - - R operator()(ArgTypes...) const {} - }; - - namespace ns { - class Old {}; - void f() { - function<void(Old)> func; - } - } // namespace ns)"; - std::string Expected = R"( - template <class T> - class function; - template <class R, class... ArgTypes> - class function<R(ArgTypes...)> { - public: - template <typename Functor> - function(Functor f) {} - - function() {} - - R operator()(ArgTypes...) const {} - }; - - namespace ns { - class New {}; - void f() { - function<void(::new_ns::New)> func; - } - } // namespace ns)"; - std::string After = runClangRenameOnCode(Before, "ns::Old", "::new_ns::New"); - CompareSnippets(Expected, After); -} - -TEST_F(ClangRenameTest, DontChangeIfSameName) { - std::string Before = R"( - namespace foo { - class Old { - public: - static void foo() {} - }; - } - - void f(foo::Old * x) { - foo::Old::foo() ; - } - using foo::Old;)"; - std::string Expected = R"( - namespace foo { - class Old { - public: - static void foo() {} - }; - } - - void f(foo::Old * x) { - foo::Old::foo() ; - } - using foo::Old;)"; - std::string After = runClangRenameOnCode(Before, "foo::Old", "foo::Old"); - CompareSnippets(Expected, After); -} - -TEST_F(ClangRenameTest, ChangeIfNewNameWithLeadingDotDot) { - std::string Before = R"( - namespace foo { - class Old { - public: - static void foo() {} - }; - } - - void f(foo::Old * x) { - foo::Old::foo() ; - } - using foo::Old;)"; - std::string Expected = R"( - namespace foo { - class Old { - public: - static void foo() {} - }; - } - - void f(::foo::Old * x) { - ::foo::Old::foo() ; - } - using ::foo::Old;)"; - std::string After = runClangRenameOnCode(Before, "foo::Old", "::foo::Old"); - CompareSnippets(Expected, After); -} - -TEST_F(ClangRenameTest, ChangeIfSameNameWithLeadingDotDot) { - std::string Before = R"( - namespace foo { - class Old { - public: - static void foo() {} - }; - } - - void f(foo::Old * x) { - foo::Old::foo() ; - } - using foo::Old;)"; - std::string Expected = R"( - namespace foo { - class Old { - public: - static void foo() {} - }; - } - - void f(::foo::Old * x) { - ::foo::Old::foo() ; - } - using ::foo::Old;)"; - std::string After = runClangRenameOnCode(Before, "::foo::Old", "::foo::Old"); - CompareSnippets(Expected, After); -} - -TEST_F(RenameClassTest, UsingAlias) { - std::string Before = R"( - namespace a { struct A {}; } - - namespace foo { - using Alias = a::A; - Alias a; - })"; - std::string Expected = R"( - namespace a { struct B {}; } - - namespace foo { - using Alias = b::B; - Alias a; - })"; - std::string After = runClangRenameOnCode(Before, "a::A", "b::B"); - CompareSnippets(Expected, After); -} - -TEST_F(ClangRenameTest, FieldDesignatedInitializers) { - std::string Before = R"( - struct S { - int a; - }; - void foo() { - S s = { .a = 10 }; - s.a = 20; - })"; - std::string Expected = R"( - struct S { - int b; - }; - void foo() { - S s = { .b = 10 }; - s.b = 20; - })"; - std::string After = runClangRenameOnCode(Before, "S::a", "S::b"); - CompareSnippets(Expected, After); -} - -// FIXME: investigate why the test fails when adding a new USR to the USRSet. -TEST_F(ClangRenameTest, DISABLED_NestedTemplates) { - std::string Before = R"( - namespace a { template <typename T> struct A {}; } - a::A<a::A<int>> foo;)"; - std::string Expected = R"( - namespace a { template <typename T> struct B {}; } - b::B<b::B<int>> foo;)"; - std::string After = runClangRenameOnCode(Before, "a::A", "b::B"); - CompareSnippets(Expected, After); -} - - -} // anonymous namespace -} // namespace test -} // namespace clang_rename -} // namesdpace clang diff --git a/clang/unittests/Rename/RenameEnumTest.cpp b/clang/unittests/Rename/RenameEnumTest.cpp deleted file mode 100644 index dc3440047c4a..000000000000 --- a/clang/unittests/Rename/RenameEnumTest.cpp +++ /dev/null @@ -1,189 +0,0 @@ -#include "ClangRenameTest.h" - -namespace clang { -namespace clang_rename { -namespace test { -namespace { - -class RenameEnumTest : public ClangRenameTest { -public: - RenameEnumTest() { - AppendToHeader(R"( - #define MACRO(x) x - namespace a { - enum A1 { Red }; - enum class A2 { Blue }; - struct C { - enum NestedEnum { White }; - enum class NestedScopedEnum { Black }; - }; - namespace d { - enum A3 { Orange }; - } // namespace d - enum A4 { Pink }; - } // namespace a - enum A5 { Green };)"); - } -}; - -INSTANTIATE_TEST_SUITE_P( - RenameEnumTests, RenameEnumTest, - testing::ValuesIn(std::vector<Case>({ - {"void f(a::A2 arg) { a::A2 t = a::A2::Blue; }", - "void f(b::B2 arg) { b::B2 t = b::B2::Blue; }", "a::A2", "b::B2"}, - {"void f() { a::A1* t1; }", "void f() { b::B1* t1; }", "a::A1", - "b::B1"}, - {"void f() { a::A2* t1; }", "void f() { b::B2* t1; }", "a::A2", - "b::B2"}, - {"void f() { enum a::A2 t = a::A2::Blue; }", - "void f() { enum b::B2 t = b::B2::Blue; }", "a::A2", "b::B2"}, - {"void f() { enum a::A2 t = a::A2::Blue; }", - "void f() { enum b::B2 t = b::B2::Blue; }", "a::A2", "b::B2"}, - - {"void f() { a::A1 t = a::Red; }", "void f() { b::B1 t = b::B1::Red; }", - "a::A1", "b::B1"}, - {"void f() { a::A1 t = a::A1::Red; }", - "void f() { b::B1 t = b::B1::Red; }", "a::A1", "b::B1"}, - {"void f() { auto t = a::Red; }", "void f() { auto t = b::B1::Red; }", - "a::A1", "b::B1"}, - {"namespace b { void f() { a::A1 t = a::Red; } }", - "namespace b { void f() { B1 t = B1::Red; } }", "a::A1", "b::B1"}, - {"void f() { a::d::A3 t = a::d::Orange; }", - "void f() { a::b::B3 t = a::b::B3::Orange; }", "a::d::A3", "a::b::B3"}, - {"namespace a { void f() { a::d::A3 t = a::d::Orange; } }", - "namespace a { void f() { b::B3 t = b::B3::Orange; } }", "a::d::A3", - "a::b::B3"}, - {"void f() { A5 t = Green; }", "void f() { B5 t = Green; }", "A5", - "B5"}, - // FIXME: the new namespace qualifier should be added to the unscoped - // enum constant. - {"namespace a { void f() { auto t = Green; } }", - "namespace a { void f() { auto t = Green; } }", "a::A1", "b::B1"}, - - // namespace qualifiers - {"namespace a { void f(A1 a1) {} }", - "namespace a { void f(b::B1 a1) {} }", "a::A1", "b::B1"}, - {"namespace a { void f(A2 a2) {} }", - "namespace a { void f(b::B2 a2) {} }", "a::A2", "b::B2"}, - {"namespace b { void f(a::A1 a1) {} }", - "namespace b { void f(B1 a1) {} }", "a::A1", "b::B1"}, - {"namespace b { void f(a::A2 a2) {} }", - "namespace b { void f(B2 a2) {} }", "a::A2", "b::B2"}, - - // nested enums - {"void f() { a::C::NestedEnum t = a::C::White; }", - "void f() { a::C::NewNestedEnum t = a::C::NewNestedEnum::White; }", - "a::C::NestedEnum", "a::C::NewNestedEnum"}, - {"void f() { a::C::NestedScopedEnum t = a::C::NestedScopedEnum::Black; " - "}", - "void f() { a::C::NewNestedScopedEnum t = " - "a::C::NewNestedScopedEnum::Black; }", - "a::C::NestedScopedEnum", "a::C::NewNestedScopedEnum"}, - - // macros - {"void f(MACRO(a::A1) a1) {}", "void f(MACRO(b::B1) a1) {}", "a::A1", - "b::B1"}, - {"void f(MACRO(a::A2) a2) {}", "void f(MACRO(b::B2) a2) {}", "a::A2", - "b::B2"}, - {"#define FOO(T, t) T t\nvoid f() { FOO(a::A1, a1); }", - "#define FOO(T, t) T t\nvoid f() { FOO(b::B1, a1); }", "a::A1", - "b::B1"}, - {"#define FOO(T, t) T t\nvoid f() { FOO(a::A2, a2); }", - "#define FOO(T, t) T t\nvoid f() { FOO(b::B2, a2); }", "a::A2", - "b::B2"}, - {"#define FOO(n) a::A1 n\nvoid f() { FOO(a1); FOO(a2); }", - "#define FOO(n) b::B1 n\nvoid f() { FOO(a1); FOO(a2); }", "a::A1", - "b::B1"}, - - // using and type alias - {"using a::A1; A1 gA;", "using b::B1; b::B1 gA;", "a::A1", "b::B1"}, - {"using a::A2; A2 gA;", "using b::B2; b::B2 gA;", "a::A2", "b::B2"}, - {"struct S { using T = a::A1; T a_; };", - "struct S { using T = b::B1; T a_; };", "a::A1", "b::B1"}, - {"using T = a::A1; T gA;", "using T = b::B1; T gA;", "a::A1", "b::B1"}, - {"using T = a::A2; T gA;", "using T = b::B2; T gA;", "a::A2", "b::B2"}, - {"typedef a::A1 T; T gA;", "typedef b::B1 T; T gA;", "a::A1", "b::B1"}, - {"typedef a::A2 T; T gA;", "typedef b::B2 T; T gA;", "a::A2", "b::B2"}, - {"typedef MACRO(a::A1) T; T gA;", "typedef MACRO(b::B1) T; T gA;", - "a::A1", "b::B1"}, - - // templates - {"template<typename T> struct Foo { T t; }; void f() { Foo<a::A1> " - "foo1; }", - "template<typename T> struct Foo { T t; }; void f() { Foo<b::B1> " - "foo1; }", - "a::A1", "b::B1"}, - {"template<typename T> struct Foo { T t; }; void f() { Foo<a::A2> " - "foo2; }", - "template<typename T> struct Foo { T t; }; void f() { Foo<b::B2> " - "foo2; }", - "a::A2", "b::B2"}, - {"template<typename T> struct Foo { a::A1 a1; };", - "template<typename T> struct Foo { b::B1 a1; };", "a::A1", "b::B1"}, - {"template<typename T> struct Foo { a::A2 a2; };", - "template<typename T> struct Foo { b::B2 a2; };", "a::A2", "b::B2"}, - {"template<typename T> int f() { return 1; } template<> int f<a::A1>() " - "{ return 2; } int g() { return f<a::A1>(); }", - "template<typename T> int f() { return 1; } template<> int f<b::B1>() " - "{ return 2; } int g() { return f<b::B1>(); }", - "a::A1", "b::B1"}, - {"template<typename T> int f() { return 1; } template<> int f<a::A2>() " - "{ return 2; } int g() { return f<a::A2>(); }", - "template<typename T> int f() { return 1; } template<> int f<b::B2>() " - "{ return 2; } int g() { return f<b::B2>(); }", - "a::A2", "b::B2"}, - {"struct Foo { template <typename T> T foo(); }; void g() { Foo f; " - "f.foo<a::A1>(); }", - "struct Foo { template <typename T> T foo(); }; void g() { Foo f; " - "f.foo<b::B1>(); }", - "a::A1", "b::B1"}, - {"struct Foo { template <typename T> T foo(); }; void g() { Foo f; " - "f.foo<a::A2>(); }", - "struct Foo { template <typename T> T foo(); }; void g() { Foo f; " - "f.foo<b::B2>(); }", - "a::A2", "b::B2"}, - })) ); - -TEST_P(RenameEnumTest, RenameEnums) { - auto Param = GetParam(); - assert(!Param.OldName.empty()); - assert(!Param.NewName.empty()); - std::string Actual = - runClangRenameOnCode(Param.Before, Param.OldName, Param.NewName); - CompareSnippets(Param.After, Actual); -} - -TEST_F(RenameEnumTest, RenameEnumDecl) { - std::string Before = R"( - namespace ns { - enum Old1 { Blue }; - } - )"; - std::string Expected = R"( - namespace ns { - enum New1 { Blue }; - } - )"; - std::string After = runClangRenameOnCode(Before, "ns::Old1", "ns::New1"); - CompareSnippets(Expected, After); -} - -TEST_F(RenameEnumTest, RenameScopedEnumDecl) { - std::string Before = R"( - namespace ns { - enum class Old1 { Blue }; - } - )"; - std::string Expected = R"( - namespace ns { - enum class New1 { Blue }; - } - )"; - std::string After = runClangRenameOnCode(Before, "ns::Old1", "ns::New1"); - CompareSnippets(Expected, After); -} - -} // anonymous namespace -} // namespace test -} // namespace clang_rename -} // namesdpace clang diff --git a/clang/unittests/Rename/RenameFunctionTest.cpp b/clang/unittests/Rename/RenameFunctionTest.cpp deleted file mode 100644 index 1c9b112232eb..000000000000 --- a/clang/unittests/Rename/RenameFunctionTest.cpp +++ /dev/null @@ -1,573 +0,0 @@ -//===-- RenameFunctionTest.cpp - unit tests for renaming functions --------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "ClangRenameTest.h" - -namespace clang { -namespace clang_rename { -namespace test { -namespace { - -class RenameFunctionTest : public ClangRenameTest { -public: - RenameFunctionTest() { - AppendToHeader(R"( - struct A { - static bool Foo(); - static bool Spam(); - }; - struct B { - static void Same(); - static bool Foo(); - static int Eric(int x); - }; - void Same(int x); - int Eric(int x); - namespace base { - void Same(); - void ToNanoSeconds(); - void ToInt64NanoSeconds(); - })"); - } -}; - -TEST_F(RenameFunctionTest, RefactorsAFoo) { - std::string Before = R"( - void f() { - A::Foo(); - ::A::Foo(); - })"; - std::string Expected = R"( - void f() { - A::Bar(); - ::A::Bar(); - })"; - - std::string After = runClangRenameOnCode(Before, "A::Foo", "A::Bar"); - CompareSnippets(Expected, After); -} - -TEST_F(RenameFunctionTest, RefactorsNonCallingAFoo) { - std::string Before = R"( - bool g(bool (*func)()) { - return func(); - } - void f() { - auto *ref1 = A::Foo; - auto *ref2 = ::A::Foo; - g(A::Foo); - })"; - std::string Expected = R"( - bool g(bool (*func)()) { - return func(); - } - void f() { - auto *ref1 = A::Bar; - auto *ref2 = ::A::Bar; - g(A::Bar); - })"; - std::string After = runClangRenameOnCode(Before, "A::Foo", "A::Bar"); - CompareSnippets(Expected, After); -} - -TEST_F(RenameFunctionTest, RefactorsEric) { - std::string Before = R"( - void f() { - if (Eric(3)==4) ::Eric(2); - })"; - std::string Expected = R"( - void f() { - if (Larry(3)==4) ::Larry(2); - })"; - std::string After = runClangRenameOnCode(Before, "Eric", "Larry"); - CompareSnippets(Expected, After); -} - -TEST_F(RenameFunctionTest, RefactorsNonCallingEric) { - std::string Before = R"( - int g(int (*func)(int)) { - return func(1); - } - void f() { - auto *ref = ::Eric; - g(Eric); - })"; - std::string Expected = R"( - int g(int (*func)(int)) { - return func(1); - } - void f() { - auto *ref = ::Larry; - g(Larry); - })"; - std::string After = runClangRenameOnCode(Before, "Eric", "Larry"); - CompareSnippets(Expected, After); -} - -TEST_F(RenameFunctionTest, DoesNotRefactorBFoo) { - std::string Before = R"( - void f() { - B::Foo(); - })"; - std::string After = runClangRenameOnCode(Before, "A::Foo", "A::Bar"); - CompareSnippets(Before, After); -} - -TEST_F(RenameFunctionTest, DoesNotRefactorBEric) { - std::string Before = R"( - void f() { - B::Eric(2); - })"; - std::string After = runClangRenameOnCode(Before, "Eric", "Larry"); - CompareSnippets(Before, After); -} - -TEST_F(RenameFunctionTest, DoesNotRefactorCEric) { - std::string Before = R"( - namespace C { int Eric(int x); } - void f() { - if (C::Eric(3)==4) ::C::Eric(2); - })"; - std::string Expected = R"( - namespace C { int Eric(int x); } - void f() { - if (C::Eric(3)==4) ::C::Eric(2); - })"; - std::string After = runClangRenameOnCode(Before, "Eric", "Larry"); - CompareSnippets(Expected, After); -} - -TEST_F(RenameFunctionTest, DoesNotRefactorEricInNamespaceC) { - std::string Before = R"( - namespace C { - int Eric(int x); - void f() { - if (Eric(3)==4) Eric(2); - } - } // namespace C)"; - std::string After = runClangRenameOnCode(Before, "Eric", "Larry"); - CompareSnippets(Before, After); -} - -TEST_F(RenameFunctionTest, NamespaceQualified) { - std::string Before = R"( - void f() { - base::ToNanoSeconds(); - ::base::ToNanoSeconds(); - } - void g() { - using base::ToNanoSeconds; - base::ToNanoSeconds(); - ::base::ToNanoSeconds(); - ToNanoSeconds(); - } - namespace foo { - namespace base { - void ToNanoSeconds(); - void f() { - base::ToNanoSeconds(); - } - } - void f() { - ::base::ToNanoSeconds(); - } - })"; - std::string Expected = R"( - void f() { - base::ToInt64NanoSeconds(); - ::base::ToInt64NanoSeconds(); - } - void g() { - using base::ToInt64NanoSeconds; - base::ToInt64NanoSeconds(); - ::base::ToInt64NanoSeconds(); - base::ToInt64NanoSeconds(); - } - namespace foo { - namespace base { - void ToNanoSeconds(); - void f() { - base::ToNanoSeconds(); - } - } - void f() { - ::base::ToInt64NanoSeconds(); - } - })"; - std::string After = runClangRenameOnCode(Before, "base::ToNanoSeconds", - "base::ToInt64NanoSeconds"); - CompareSnippets(Expected, After); -} - -TEST_F(RenameFunctionTest, RenameFunctionDecls) { - std::string Before = R"( - namespace na { - void X(); - void X() {} - })"; - std::string Expected = R"( - namespace na { - void Y(); - void Y() {} - })"; - std::string After = runClangRenameOnCode(Before, "na::X", "na::Y"); - CompareSnippets(Expected, After); -} - -TEST_F(RenameFunctionTest, RenameTemplateFunctions) { - std::string Before = R"( - namespace na { - template<typename T> T X(); - } - namespace na { void f() { X<int>(); } } - namespace nb { void g() { na::X <int>(); } } - )"; - std::string Expected = R"( - namespace na { - template<typename T> T Y(); - } - namespace na { void f() { nb::Y<int>(); } } - namespace nb { void g() { Y<int>(); } } - )"; - std::string After = runClangRenameOnCode(Before, "na::X", "nb::Y"); - CompareSnippets(Expected, After); -} - -TEST_F(RenameFunctionTest, RenameOutOfLineFunctionDecls) { - std::string Before = R"( - namespace na { - void X(); - } - void na::X() {} - )"; - std::string Expected = R"( - namespace na { - void Y(); - } - void na::Y() {} - )"; - std::string After = runClangRenameOnCode(Before, "na::X", "na::Y"); - CompareSnippets(Expected, After); -} - -TEST_F(RenameFunctionTest, NewNamespaceWithoutLeadingDotDot) { - std::string Before = R"( - namespace old_ns { - void X(); - void X() {} - } - // Assume that the reference is in another file. - void f() { old_ns::X(); } - namespace old_ns { void g() { X(); } } - namespace new_ns { void h() { ::old_ns::X(); } } - )"; - std::string Expected = R"( - namespace old_ns { - void Y(); - void Y() {} - } - // Assume that the reference is in another file. - void f() { new_ns::Y(); } - namespace old_ns { void g() { new_ns::Y(); } } - namespace new_ns { void h() { Y(); } } - )"; - std::string After = runClangRenameOnCode(Before, "::old_ns::X", "new_ns::Y"); - CompareSnippets(Expected, After); -} - -TEST_F(RenameFunctionTest, NewNamespaceWithLeadingDotDot) { - std::string Before = R"( - namespace old_ns { - void X(); - void X() {} - } - // Assume that the reference is in another file. - void f() { old_ns::X(); } - namespace old_ns { void g() { X(); } } - namespace new_ns { void h() { ::old_ns::X(); } } - )"; - std::string Expected = R"( - namespace old_ns { - void Y(); - void Y() {} - } - // Assume that the reference is in another file. - void f() { ::new_ns::Y(); } - namespace old_ns { void g() { ::new_ns::Y(); } } - namespace new_ns { void h() { Y(); } } - )"; - std::string After = - runClangRenameOnCode(Before, "::old_ns::X", "::new_ns::Y"); - CompareSnippets(Expected, After); -} - -TEST_F(RenameFunctionTest, DontRenameSymbolsDefinedInAnonymousNamespace) { - std::string Before = R"( - namespace old_ns { - class X {}; - namespace { - void X(); - void X() {} - void f() { X(); } - } - } - )"; - std::string Expected = R"( - namespace old_ns { - class Y {}; - namespace { - void X(); - void X() {} - void f() { X(); } - } - } - )"; - std::string After = - runClangRenameOnCode(Before, "::old_ns::X", "::old_ns::Y"); - CompareSnippets(Expected, After); -} - -TEST_F(RenameFunctionTest, NewNestedNamespace) { - std::string Before = R"( - namespace old_ns { - void X(); - void X() {} - } - // Assume that the reference is in another file. - namespace old_ns { - void f() { X(); } - } - )"; - std::string Expected = R"( - namespace old_ns { - void X(); - void X() {} - } - // Assume that the reference is in another file. - namespace old_ns { - void f() { older_ns::X(); } - } - )"; - std::string After = - runClangRenameOnCode(Before, "::old_ns::X", "::old_ns::older_ns::X"); - CompareSnippets(Expected, After); -} - -TEST_F(RenameFunctionTest, MoveFromGlobalToNamespaceWithoutLeadingDotDot) { - std::string Before = R"( - void X(); - void X() {} - - // Assume that the reference is in another file. - namespace some_ns { - void f() { X(); } - } - )"; - std::string Expected = R"( - void X(); - void X() {} - - // Assume that the reference is in another file. - namespace some_ns { - void f() { ns::X(); } - } - )"; - std::string After = - runClangRenameOnCode(Before, "::X", "ns::X"); - CompareSnippets(Expected, After); -} - -TEST_F(RenameFunctionTest, MoveFromGlobalToNamespaceWithLeadingDotDot) { - std::string Before = R"( - void Y() {} - - // Assume that the reference is in another file. - namespace some_ns { - void f() { Y(); } - } - )"; - std::string Expected = R"( - void Y() {} - - // Assume that the reference is in another file. - namespace some_ns { - void f() { ::ns::Y(); } - } - )"; - std::string After = - runClangRenameOnCode(Before, "::Y", "::ns::Y"); - CompareSnippets(Expected, After); -} - -// FIXME: the rename of overloaded operator is not fully supported yet. -TEST_F(RenameFunctionTest, DISABLED_DoNotRenameOverloadedOperatorCalls) { - std::string Before = R"( - namespace old_ns { - class T { public: int x; }; - bool operator==(const T& lhs, const T& rhs) { - return lhs.x == rhs.x; - } - } // namespace old_ns - - // Assume that the reference is in another file. - bool f() { - auto eq = old_ns::operator==; - old_ns::T t1, t2; - old_ns::operator==(t1, t2); - return t1 == t2; - } - )"; - std::string Expected = R"( - namespace old_ns { - class T { public: int x; }; - bool operator==(const T& lhs, const T& rhs) { - return lhs.x == rhs.x; - } - } // namespace old_ns - - // Assume that the reference is in another file. - bool f() { - auto eq = new_ns::operator==; - old_ns::T t1, t2; - new_ns::operator==(t1, t2); - return t1 == t2; - } - )"; - std::string After = - runClangRenameOnCode(Before, "old_ns::operator==", "new_ns::operator=="); - CompareSnippets(Expected, After); -} - -TEST_F(RenameFunctionTest, FunctionRefAsTemplate) { - std::string Before = R"( - void X(); - - // Assume that the reference is in another file. - namespace some_ns { - template <void (*Func)(void)> - class TIterator {}; - - template <void (*Func)(void)> - class T { - public: - typedef TIterator<Func> IterType; - using TI = TIterator<Func>; - void g() { - Func(); - auto func = Func; - TIterator<Func> iter; - } - }; - - - void f() { T<X> tx; tx.g(); } - } // namespace some_ns - )"; - std::string Expected = R"( - void X(); - - // Assume that the reference is in another file. - namespace some_ns { - template <void (*Func)(void)> - class TIterator {}; - - template <void (*Func)(void)> - class T { - public: - typedef TIterator<Func> IterType; - using TI = TIterator<Func>; - void g() { - Func(); - auto func = Func; - TIterator<Func> iter; - } - }; - - - void f() { T<ns::X> tx; tx.g(); } - } // namespace some_ns - )"; - std::string After = runClangRenameOnCode(Before, "::X", "ns::X"); - CompareSnippets(Expected, After); -} - -TEST_F(RenameFunctionTest, RenameFunctionInUsingDecl) { - std::string Before = R"( - using base::ToNanoSeconds; - namespace old_ns { - using base::ToNanoSeconds; - void f() { - using base::ToNanoSeconds; - } - } - )"; - std::string Expected = R"( - using base::ToInt64NanoSeconds; - namespace old_ns { - using base::ToInt64NanoSeconds; - void f() { - using base::ToInt64NanoSeconds; - } - } - )"; - std::string After = runClangRenameOnCode(Before, "base::ToNanoSeconds", - "base::ToInt64NanoSeconds"); - CompareSnippets(Expected, After); -} - -// FIXME: Fix the complex the case where the symbol being renamed is located in -// `std::function<decltype<renamed_symbol>>`. -TEST_F(ClangRenameTest, DISABLED_ReferencesInLambdaFunctionParameters) { - std::string Before = R"( - template <class T> - class function; - template <class R, class... ArgTypes> - class function<R(ArgTypes...)> { - public: - template <typename Functor> - function(Functor f) {} - - function() {} - - R operator()(ArgTypes...) const {} - }; - - namespace ns { - void Old() {} - void f() { - function<decltype(Old)> func; - } - } // namespace ns)"; - std::string Expected = R"( - template <class T> - class function; - template <class R, class... ArgTypes> - class function<R(ArgTypes...)> { - public: - template <typename Functor> - function(Functor f) {} - - function() {} - - R operator()(ArgTypes...) const {} - }; - - namespace ns { - void New() {} - void f() { - function<decltype(::new_ns::New)> func; - } - } // namespace ns)"; - std::string After = runClangRenameOnCode(Before, "ns::Old", "::new_ns::New"); - CompareSnippets(Expected, After); -} - -} // anonymous namespace -} // namespace test -} // namespace clang_rename -} // namesdpace clang diff --git a/clang/unittests/Rename/RenameMemberTest.cpp b/clang/unittests/Rename/RenameMemberTest.cpp deleted file mode 100644 index c16d16aa25f0..000000000000 --- a/clang/unittests/Rename/RenameMemberTest.cpp +++ /dev/null @@ -1,228 +0,0 @@ -//===-- ClangMemberTests.cpp - unit tests for renaming class members ------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "ClangRenameTest.h" - -namespace clang { -namespace clang_rename { -namespace test { -namespace { - -class RenameMemberTest : public ClangRenameTest { -public: - RenameMemberTest() { - AppendToHeader(R"( - struct NA { - void Foo(); - void NotFoo(); - static void SFoo(); - static void SNotFoo(); - int Moo; - }; - struct A { - virtual void Foo(); - void NotFoo(); - static void SFoo(); - static void SNotFoo(); - int Moo; - int NotMoo; - static int SMoo; - }; - struct B : public A { - void Foo() override; - }; - template <typename T> struct TA { - T* Foo(); - T* NotFoo(); - static T* SFoo(); - static T* NotSFoo(); - }; - template <typename T> struct TB : public TA<T> {}; - namespace ns { - template <typename T> struct TA { - T* Foo(); - T* NotFoo(); - static T* SFoo(); - static T* NotSFoo(); - static int SMoo; - }; - template <typename T> struct TB : public TA<T> {}; - struct A { - void Foo(); - void NotFoo(); - static void SFoo(); - static void SNotFoo(); - }; - struct B : public A {}; - struct C { - template <class T> - void SFoo(const T& t) {} - template <class T> - void Foo() {} - }; - })"); - } -}; - -INSTANTIATE_TEST_SUITE_P( - DISABLED_RenameTemplatedClassStaticVariableTest, RenameMemberTest, - testing::ValuesIn(std::vector<Case>({ - // FIXME: support renaming static variables for template classes. - {"void f() { ns::TA<int>::SMoo; }", - "void f() { ns::TA<int>::SMeh; }", "ns::TA::SMoo", "ns::TA::SMeh"}, - })) ); - -INSTANTIATE_TEST_SUITE_P( - RenameMemberTest, RenameMemberTest, - testing::ValuesIn(std::vector<Case>({ - // Normal methods and fields. - {"void f() { A a; a.Foo(); }", "void f() { A a; a.Bar(); }", "A::Foo", - "A::Bar"}, - {"void f() { ns::A a; a.Foo(); }", "void f() { ns::A a; a.Bar(); }", - "ns::A::Foo", "ns::A::Bar"}, - {"void f() { A a; int x = a.Moo; }", "void f() { A a; int x = a.Meh; }", - "A::Moo", "A::Meh"}, - {"void f() { B b; b.Foo(); }", "void f() { B b; b.Bar(); }", "B::Foo", - "B::Bar"}, - {"void f() { ns::B b; b.Foo(); }", "void f() { ns::B b; b.Bar(); }", - "ns::A::Foo", "ns::A::Bar"}, - {"void f() { B b; int x = b.Moo; }", "void f() { B b; int x = b.Meh; }", - "A::Moo", "A::Meh"}, - - // Static methods. - {"void f() { A::SFoo(); }", "void f() { A::SBar(); }", "A::SFoo", - "A::SBar"}, - {"void f() { ns::A::SFoo(); }", "void f() { ns::A::SBar(); }", - "ns::A::SFoo", "ns::A::SBar"}, - {"void f() { TA<int>::SFoo(); }", "void f() { TA<int>::SBar(); }", - "TA::SFoo", "TA::SBar"}, - {"void f() { ns::TA<int>::SFoo(); }", - "void f() { ns::TA<int>::SBar(); }", "ns::TA::SFoo", "ns::TA::SBar"}, - - // Static variables. - {"void f() { A::SMoo; }", - "void f() { A::SMeh; }", "A::SMoo", "A::SMeh"}, - - // Templated methods. - {"void f() { TA<int> a; a.Foo(); }", "void f() { TA<int> a; a.Bar(); }", - "TA::Foo", "TA::Bar"}, - {"void f() { ns::TA<int> a; a.Foo(); }", - "void f() { ns::TA<int> a; a.Bar(); }", "ns::TA::Foo", "ns::TA::Bar"}, - {"void f() { TB<int> b; b.Foo(); }", "void f() { TB<int> b; b.Bar(); }", - "TA::Foo", "TA::Bar"}, - {"void f() { ns::TB<int> b; b.Foo(); }", - "void f() { ns::TB<int> b; b.Bar(); }", "ns::TA::Foo", "ns::TA::Bar"}, - {"void f() { ns::C c; int x; c.SFoo(x); }", - "void f() { ns::C c; int x; c.SBar(x); }", "ns::C::SFoo", - "ns::C::SBar"}, - {"void f() { ns::C c; c.Foo<int>(); }", - "void f() { ns::C c; c.Bar<int>(); }", "ns::C::Foo", "ns::C::Bar"}, - - // Pointers to methods. - {"void f() { auto p = &A::Foo; }", "void f() { auto p = &A::Bar; }", - "A::Foo", "A::Bar"}, - {"void f() { auto p = &A::SFoo; }", "void f() { auto p = &A::SBar; }", - "A::SFoo", "A::SBar"}, - {"void f() { auto p = &B::Foo; }", "void f() { auto p = &B::Bar; }", - "B::Foo", "B::Bar"}, - {"void f() { auto p = &ns::A::Foo; }", - "void f() { auto p = &ns::A::Bar; }", "ns::A::Foo", "ns::A::Bar"}, - {"void f() { auto p = &ns::A::SFoo; }", - "void f() { auto p = &ns::A::SBar; }", "ns::A::SFoo", "ns::A::SBar"}, - {"void f() { auto p = &ns::C::SFoo<int>; }", - "void f() { auto p = &ns::C::SBar<int>; }", "ns::C::SFoo", - "ns::C::SBar"}, - - // These methods are not declared or overridden in the subclass B, we - // have to use the qualified name with parent class A to identify them. - {"void f() { auto p = &ns::B::Foo; }", - "void f() { auto p = &ns::B::Bar; }", "ns::A::Foo", "ns::B::Bar"}, - {"void f() { B::SFoo(); }", "void f() { B::SBar(); }", "A::SFoo", - "B::SBar"}, - {"void f() { ns::B::SFoo(); }", "void f() { ns::B::SBar(); }", - "ns::A::SFoo", "ns::B::SBar"}, - {"void f() { auto p = &B::SFoo; }", "void f() { auto p = &B::SBar; }", - "A::SFoo", "B::SBar"}, - {"void f() { auto p = &ns::B::SFoo; }", - "void f() { auto p = &ns::B::SBar; }", "ns::A::SFoo", "ns::B::SBar"}, - {"void f() { TB<int>::SFoo(); }", "void f() { TB<int>::SBar(); }", - "TA::SFoo", "TB::SBar"}, - {"void f() { ns::TB<int>::SFoo(); }", - "void f() { ns::TB<int>::SBar(); }", "ns::TA::SFoo", "ns::TB::SBar"}, - })) ); - -TEST_P(RenameMemberTest, RenameMembers) { - auto Param = GetParam(); - assert(!Param.OldName.empty()); - assert(!Param.NewName.empty()); - std::string Actual = - runClangRenameOnCode(Param.Before, Param.OldName, Param.NewName); - CompareSnippets(Param.After, Actual); -} - -TEST_F(RenameMemberTest, RenameMemberInsideClassMethods) { - std::string Before = R"( - struct X { - int Moo; - void Baz() { Moo = 1; } - };)"; - std::string Expected = R"( - struct X { - int Meh; - void Baz() { Meh = 1; } - };)"; - std::string After = runClangRenameOnCode(Before, "X::Moo", "Y::Meh"); - CompareSnippets(Expected, After); -} - -TEST_F(RenameMemberTest, RenameMethodInsideClassMethods) { - std::string Before = R"( - struct X { - void Foo() {} - void Baz() { Foo(); } - };)"; - std::string Expected = R"( - struct X { - void Bar() {} - void Baz() { Bar(); } - };)"; - std::string After = runClangRenameOnCode(Before, "X::Foo", "X::Bar"); - CompareSnippets(Expected, After); -} - -TEST_F(RenameMemberTest, RenameCtorInitializer) { - std::string Before = R"( - class X { - public: - X(); - A a; - A a2; - B b; - }; - - X::X():a(), b() {} - )"; - std::string Expected = R"( - class X { - public: - X(); - A bar; - A a2; - B b; - }; - - X::X():bar(), b() {} - )"; - std::string After = runClangRenameOnCode(Before, "X::a", "X::bar"); - CompareSnippets(Expected, After); -} - -} // anonymous namespace -} // namespace test -} // namespace clang_rename -} // namesdpace clang diff --git a/clang/utils/UpdateVerifyTests/core.py b/clang/utils/UpdateVerifyTests/core.py deleted file mode 100644 index d1350cdbb698..000000000000 --- a/clang/utils/UpdateVerifyTests/core.py +++ /dev/null @@ -1,452 +0,0 @@ -import sys -import re - -DEBUG = False - - -def dprint(*args): - if DEBUG: - print(*args, file=sys.stderr) - - -class KnownException(Exception): - pass - - -def parse_error_category(s, prefix): - if "no expected directives found" in s: - return None - parts = s.split("diagnostics") - diag_category = parts[0] - category_parts = parts[0].strip().strip("'").split("-") - expected = category_parts[0] - if expected != prefix: - raise Exception( - f"expected prefix '{prefix}', but found '{expected}'. Multiple verify prefixes are not supported." - ) - diag_category = category_parts[1] - if "seen but not expected" in parts[1]: - seen = True - elif "expected but not seen" in parts[1]: - seen = False - else: - raise KnownException(f"unexpected category '{parts[1]}'") - return (diag_category, seen) - - -diag_error_re = re.compile(r"File (\S+) Line (\d+): (.+)") -diag_error_re2 = re.compile(r"File \S+ Line \d+ \(directive at (\S+):(\d+)\): (.+)") - - -def parse_diag_error(s): - m = diag_error_re2.match(s) - if not m: - m = diag_error_re.match(s) - if not m: - return None - return (m.group(1), int(m.group(2)), m.group(3)) - - -class Line: - def __init__(self, content, line_n): - self.content = content - self.diag = None - self.line_n = line_n - self.targeting_diags = [] - - def update_line_n(self, n): - self.line_n = n - - def render(self): - if not self.diag: - return self.content - assert "{{DIAG}}" in self.content - res = self.content.replace("{{DIAG}}", self.diag.render()) - if not res.strip(): - return "" - return res - - -class Diag: - def __init__( - self, - prefix, - diag_content, - category, - parsed_target_line_n, - line_is_absolute, - count, - line, - is_re, - whitespace_strings, - is_from_source_file, - ): - self.prefix = prefix - self.diag_content = diag_content - self.category = category - self.parsed_target_line_n = parsed_target_line_n - self.line_is_absolute = line_is_absolute - self.count = count - self.line = line - self.target = None - self.is_re = is_re - self.absolute_target() - self.whitespace_strings = whitespace_strings - self.is_from_source_file = is_from_source_file - - def decrement_count(self): - self.count -= 1 - assert self.count >= 0 - - def increment_count(self): - assert self.count >= 0 - self.count += 1 - - def unset_target(self): - assert self.target is not None - self.target.targeting_diags.remove(self) - self.target = None - - def set_target(self, target): - if self.target: - self.unset_target() - self.target = target - self.target.targeting_diags.append(self) - - def absolute_target(self): - if self.target: - return self.target.line_n - if self.line_is_absolute: - return self.parsed_target_line_n - return self.line.line_n + self.parsed_target_line_n - - def relative_target(self): - return self.absolute_target() - self.line.line_n - - def take(self, other_diag): - assert self.count == 0 - assert other_diag.count > 0 - assert other_diag.target == self.target - assert not other_diag.line_is_absolute - assert not other_diag.is_re and not self.is_re - self.line_is_absolute = False - self.diag_content = other_diag.diag_content - self.count = other_diag.count - self.category = other_diag.category - self.count = other_diag.count - other_diag.count = 0 - - def render(self): - assert self.count >= 0 - if self.count == 0: - return "" - line_location_s = "" - if self.relative_target() != 0: - if self.line_is_absolute: - line_location_s = f"@{self.absolute_target()}" - elif self.relative_target() > 0: - line_location_s = f"@+{self.relative_target()}" - else: - line_location_s = ( - f"@{self.relative_target()}" # the minus sign is implicit - ) - count_s = "" if self.count == 1 else f"{self.count}" - re_s = "-re" if self.is_re else "" - if self.whitespace_strings: - whitespace1_s = self.whitespace_strings[0] - whitespace2_s = self.whitespace_strings[1] - whitespace3_s = self.whitespace_strings[2] - else: - whitespace1_s = " " - whitespace2_s = "" - whitespace3_s = "" - if count_s and not whitespace2_s: - whitespace2_s = " " # required to parse correctly - elif not count_s and whitespace2_s == " ": - """Don't emit a weird extra space. - However if the whitespace is something other than the - standard single space, let it be to avoid disrupting manual formatting. - The existence of a non-empty whitespace2_s implies this was parsed with - a count > 1 and then decremented, otherwise this whitespace would have - been parsed as whitespace3_s. - """ - whitespace2_s = "" - return f"//{whitespace1_s}{self.prefix}-{self.category}{re_s}{line_location_s}{whitespace2_s}{count_s}{whitespace3_s}{{{{{self.diag_content}}}}}" - - -expected_diag_re = re.compile( - r"//(\s*)([a-zA-Z]+)-(note|warning|error)(-re)?(@[+-]?\d+)?(?:(\s*)(\d+))?(\s*)\{\{(.*)\}\}" -) - - -def parse_diag(line, filename, lines, prefix): - s = line.content - ms = expected_diag_re.findall(s) - if not ms: - return None - if len(ms) > 1: - raise KnownException( - f"multiple diags on line {filename}:{line.line_n}. Aborting due to missing implementation." - ) - [ - whitespace1_s, - check_prefix, - category_s, - re_s, - target_line_s, - whitespace2_s, - count_s, - whitespace3_s, - diag_s, - ] = ms[0] - if check_prefix != prefix: - return None - if not target_line_s: - target_line_n = 0 - is_absolute = False - elif target_line_s.startswith("@+"): - target_line_n = int(target_line_s[2:]) - is_absolute = False - elif target_line_s.startswith("@-"): - target_line_n = int(target_line_s[1:]) - is_absolute = False - else: - target_line_n = int(target_line_s[1:]) - is_absolute = True - count = int(count_s) if count_s else 1 - line.content = expected_diag_re.sub("{{DIAG}}", s) - - return Diag( - prefix, - diag_s, - category_s, - target_line_n, - is_absolute, - count, - line, - bool(re_s), - [whitespace1_s, whitespace2_s, whitespace3_s], - True, - ) - - -def add_line(new_line, lines): - lines.insert(new_line.line_n - 1, new_line) - for i in range(new_line.line_n, len(lines)): - line = lines[i] - assert line.line_n == i - line.update_line_n(i + 1) - assert all(line.line_n == i + 1 for i, line in enumerate(lines)) - - -def remove_line(old_line, lines): - lines.remove(old_line) - for i in range(old_line.line_n - 1, len(lines)): - line = lines[i] - assert line.line_n == i + 2 - line.update_line_n(i + 1) - assert all(line.line_n == i + 1 for i, line in enumerate(lines)) - - -indent_re = re.compile(r"\s*") - - -def get_indent(s): - return indent_re.match(s).group(0) - - -def orig_line_n_to_new_line_n(line_n, orig_lines): - return orig_lines[line_n - 1].line_n - - -def add_diag(orig_line_n, diag_s, diag_category, lines, orig_lines, prefix): - line_n = orig_line_n_to_new_line_n(orig_line_n, orig_lines) - target = lines[line_n - 1] - for other in target.targeting_diags: - if other.is_re: - raise KnownException( - "mismatching diag on line with regex matcher. Skipping due to missing implementation" - ) - reverse = ( - True - if [other for other in target.targeting_diags if other.relative_target() < 0] - else False - ) - - targeting = [ - other for other in target.targeting_diags if not other.line_is_absolute - ] - targeting.sort(reverse=reverse, key=lambda d: d.relative_target()) - prev_offset = 0 - prev_line = target - direction = -1 if reverse else 1 - for d in targeting: - if d.relative_target() != prev_offset + direction: - break - prev_offset = d.relative_target() - prev_line = d.line - total_offset = prev_offset - 1 if reverse else prev_offset + 1 - if reverse: - new_line_n = prev_line.line_n + 1 - else: - new_line_n = prev_line.line_n - assert new_line_n == line_n + (not reverse) - total_offset - - new_line = Line(get_indent(prev_line.content) + "{{DIAG}}\n", new_line_n) - add_line(new_line, lines) - - whitespace_strings = prev_line.diag.whitespace_strings if prev_line.diag else None - new_diag = Diag( - prefix, - diag_s, - diag_category, - total_offset, - False, - 1, - new_line, - False, - whitespace_strings, - False, - ) - new_line.diag = new_diag - new_diag.set_target(target) - - -def remove_dead_diags(lines): - for line in lines: - if not line.diag or line.diag.count != 0: - continue - if line.render() == "": - remove_line(line, lines) - else: - assert line.diag.is_from_source_file - for other_diag in line.targeting_diags: - if ( - other_diag.is_from_source_file - or other_diag.count == 0 - or other_diag.category != line.diag.category - ): - continue - if other_diag.is_re or line.diag.is_re: - continue - line.diag.take(other_diag) - remove_line(other_diag.line, lines) - - -def has_live_diags(lines): - for line in lines: - if line.diag and line.diag.count > 0: - return True - return False - - -def get_expected_no_diags_line_n(lines, prefix): - for line in lines: - if f"{prefix}-no-diagnostics" in line.content: - return line.line_n - return None - - -def update_test_file(filename, diag_errors, prefix, updated_test_files): - dprint(f"updating test file {filename}") - if filename in updated_test_files: - raise KnownException(f"{filename} already updated, but got new output") - else: - updated_test_files.add(filename) - with open(filename, "r") as f: - lines = [Line(line, i + 1) for i, line in enumerate(f.readlines())] - orig_lines = list(lines) - expected_no_diags_line_n = get_expected_no_diags_line_n(orig_lines, prefix) - - for line in lines: - diag = parse_diag(line, filename, lines, prefix) - if diag: - line.diag = diag - diag.set_target(lines[diag.absolute_target() - 1]) - - for line_n, diag_s, diag_category, seen in diag_errors: - if seen: - continue - # this is a diagnostic expected but not seen - assert lines[line_n - 1].diag - if diag_s != lines[line_n - 1].diag.diag_content: - raise KnownException( - f"{filename}:{line_n} - found diag {lines[line_n - 1].diag.diag_content} but expected {diag_s}" - ) - if diag_category != lines[line_n - 1].diag.category: - raise KnownException( - f"{filename}:{line_n} - found {lines[line_n - 1].diag.category} diag but expected {diag_category}" - ) - lines[line_n - 1].diag.decrement_count() - diag_errors_left = [] - diag_errors.sort(reverse=True, key=lambda t: t[0]) - for line_n, diag_s, diag_category, seen in diag_errors: - if not seen: - continue - target = orig_lines[line_n - 1] - other_diags = [ - d - for d in target.targeting_diags - if d.diag_content == diag_s and d.category == diag_category - ] - other_diag = other_diags[0] if other_diags else None - if other_diag: - other_diag.increment_count() - else: - add_diag(line_n, diag_s, diag_category, lines, orig_lines, prefix) - remove_dead_diags(lines) - has_diags = has_live_diags(lines) - with open(filename, "w") as f: - if not has_diags and expected_no_diags_line_n is None: - f.write("// expected-no-diagnostics\n") - for line in lines: - if has_diags and line.line_n == expected_no_diags_line_n: - continue - f.write(line.render()) - - -def update_test_files(errors, prefix): - errors_by_file = {} - for (filename, line, diag_s), (diag_category, seen) in errors: - if filename not in errors_by_file: - errors_by_file[filename] = [] - errors_by_file[filename].append((line, diag_s, diag_category, seen)) - updated_test_files = set() - for filename, diag_errors in errors_by_file.items(): - try: - update_test_file(filename, diag_errors, prefix, updated_test_files) - except KnownException as e: - return f"Error in update-verify-tests while updating {filename}: {e}" - updated_files = list(updated_test_files) - assert updated_files - if len(updated_files) == 1: - return f"updated file {updated_files[0]}" - updated_files_s = "\n\t".join(updated_files) - return "updated files:\n\t{updated_files_s}" - - -def check_expectations(tool_output, prefix): - """ - The entry point function. - Called by the stand-alone update-verify-tests.py as well as litplugin.py. - """ - curr = [] - curr_category = None - try: - for line in tool_output: - if line.startswith("error: "): - curr_category = parse_error_category(line[len("error: ") :], prefix) - continue - - diag_error = parse_diag_error(line.strip()) - if diag_error: - curr.append((diag_error, curr_category)) - else: - dprint("no match") - dprint(line.strip()) - except KnownException as e: - return f"Error in update-verify-tests while parsing tool output: {e}" - if curr: - return update_test_files(curr, prefix) - else: - return "no mismatching diagnostics found" diff --git a/clang/utils/update-verify-tests.py b/clang/utils/update-verify-tests.py deleted file mode 100644 index e2874a8c049e..000000000000 --- a/clang/utils/update-verify-tests.py +++ /dev/null @@ -1,38 +0,0 @@ -import sys -import argparse -from UpdateVerifyTests.core import check_expectations - -""" - Pipe output from clang's -verify into this script to have the test case updated to expect the actual diagnostic output. - When inserting new expected-* checks it will place them on the line before the location of the diagnostic, with an @+1, - or @+N for some N if there are multiple diagnostics emitted on the same line. If the current checks are using @-N for - this line, the new check will follow that convention also. - Existing checks will be left untouched as much as possible, including their location and whitespace content, to minimize - diffs. If inaccurate their count will be updated, or the check removed entirely. - - Missing features: - - multiple prefixes on the same line (-verify=my-prefix,my-other-prefix) - - multiple prefixes on separate RUN lines (RUN: -verify=my-prefix\nRUN: -verify my-other-prefix) - - regexes with expected-*-re: existing ones will be left untouched if accurate, but the script will abort if there are any - diagnostic mismatches on the same line. - - multiple checks targeting the same line are supported, but a line may only contain one check - - if multiple checks targeting the same line are failing the script is not guaranteed to produce a minimal diff - -Example usage: - clang -verify [file] | python3 update-verify-tests.py - clang -verify=check [file] | python3 update-verify-tests.py --prefix check -""" - - -def main(): - parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument( - "--prefix", default="expected", help="The prefix passed to -verify" - ) - args = parser.parse_args() - output = check_expectations(sys.stdin.readlines(), args.prefix) - print(output) - - -if __name__ == "__main__": - main() diff --git a/clang/www/c_status.html b/clang/www/c_status.html index 148405ec31a9..e5da7f3c87a5 100644 --- a/clang/www/c_status.html +++ b/clang/www/c_status.html @@ -697,7 +697,7 @@ conformance.</p> <tr> <td>Enhanced enumerations</td> <td><a href="https://www.open-std.org/jtc1/sc22/wg14/www/docs/n3030.htm">N3030</a></td> - <td class="unknown" align="center">Unknown</td> + <td class="unreleased" align="center">Clang 20</td> </tr> <tr> <td>Freestanding C and IEC 60559 conformance scope reduction</td> diff --git a/compiler-rt/lib/asan/asan_rtl.cpp b/compiler-rt/lib/asan/asan_rtl.cpp index a390802af28d..19c6c210b564 100644 --- a/compiler-rt/lib/asan/asan_rtl.cpp +++ b/compiler-rt/lib/asan/asan_rtl.cpp @@ -478,9 +478,6 @@ static bool AsanInitInternal() { if (flags()->start_deactivated) AsanDeactivate(); - // interceptors - InitTlsSize(); - // Create main thread. AsanThread *main_thread = CreateMainThread(); CHECK_EQ(0, main_thread->tid()); diff --git a/compiler-rt/lib/dfsan/dfsan.cpp b/compiler-rt/lib/dfsan/dfsan.cpp index 1972a07d15ac..886e93e5fa81 100644 --- a/compiler-rt/lib/dfsan/dfsan.cpp +++ b/compiler-rt/lib/dfsan/dfsan.cpp @@ -1262,6 +1262,8 @@ static void DFsanInit(int argc, char **argv, char **envp) { CheckASLR(); + InitializePlatformEarly(); + if (!InitShadowWithReExec(dfsan_get_track_origins())) { Printf("FATAL: DataflowSanitizer can not mmap the shadow memory.\n"); DumpProcessMap(); diff --git a/compiler-rt/lib/hwasan/hwasan.cpp b/compiler-rt/lib/hwasan/hwasan.cpp index ccdc0b4bc21b..24384d8b4d2c 100644 --- a/compiler-rt/lib/hwasan/hwasan.cpp +++ b/compiler-rt/lib/hwasan/hwasan.cpp @@ -357,8 +357,6 @@ __attribute__((constructor(0))) void __hwasan_init() { hwasan_init_is_running = 1; SanitizerToolName = "HWAddressSanitizer"; - InitTlsSize(); - CacheBinaryName(); InitializeFlags(); @@ -367,6 +365,8 @@ __attribute__((constructor(0))) void __hwasan_init() { __sanitizer_set_report_path(common_flags()->log_path); + InitializePlatformEarly(); + AndroidTestTlsSlot(); DisableCoreDumperIfNecessary(); diff --git a/compiler-rt/lib/interception/interception_win.cpp b/compiler-rt/lib/interception/interception_win.cpp index a638e66eccee..a0ff124a89c9 100644 --- a/compiler-rt/lib/interception/interception_win.cpp +++ b/compiler-rt/lib/interception/interception_win.cpp @@ -130,6 +130,7 @@ #include "sanitizer_common/sanitizer_platform.h" #define WIN32_LEAN_AND_MEAN #include <windows.h> +#include <psapi.h> namespace __interception { @@ -385,7 +386,30 @@ void TestOnlyReleaseTrampolineRegions() { } } -static uptr AllocateMemoryForTrampoline(uptr image_address, size_t size) { +static uptr AllocateMemoryForTrampoline(uptr func_address, size_t size) { + uptr image_address = func_address; + +#if SANITIZER_WINDOWS64 + // Allocate memory after the module (DLL or EXE file), but within 2GB + // of the start of the module so that any address within the module can be + // referenced with PC-relative operands. + // This allows us to not just jump to the trampoline with a PC-relative + // offset, but to relocate any instructions that we copy to the trampoline + // which have references to the original module. If we can't find the base + // address of the module (e.g. if func_address is in mmap'ed memory), just + // use func_address as is. + HMODULE module; + if (::GetModuleHandleExW(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | + GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, + (LPCWSTR)func_address, &module)) { + MODULEINFO module_info; + if (::GetModuleInformation(::GetCurrentProcess(), module, + &module_info, sizeof(module_info))) { + image_address = (uptr)module_info.lpBaseOfDll; + } + } +#endif + // Find a region within 2G with enough space to allocate |size| bytes. TrampolineMemoryRegion *region = nullptr; for (size_t bucket = 0; bucket < kMaxTrampolineRegion; ++bucket) { diff --git a/compiler-rt/lib/lsan/lsan.cpp b/compiler-rt/lib/lsan/lsan.cpp index 7a27b600f203..798294b499e2 100644 --- a/compiler-rt/lib/lsan/lsan.cpp +++ b/compiler-rt/lib/lsan/lsan.cpp @@ -92,10 +92,10 @@ extern "C" void __lsan_init() { CacheBinaryName(); AvoidCVE_2016_2143(); InitializeFlags(); + InitializePlatformEarly(); InitCommonLsan(); InitializeAllocator(); ReplaceSystemMalloc(); - InitTlsSize(); InitializeInterceptors(); InitializeThreads(); InstallDeadlySignalHandlers(LsanOnDeadlySignal); diff --git a/compiler-rt/lib/memprof/memprof_rtl.cpp b/compiler-rt/lib/memprof/memprof_rtl.cpp index cf4bde808bfa..2cc6c2df5a6f 100644 --- a/compiler-rt/lib/memprof/memprof_rtl.cpp +++ b/compiler-rt/lib/memprof/memprof_rtl.cpp @@ -213,9 +213,6 @@ static void MemprofInitInternal() { InitializeCoverage(common_flags()->coverage, common_flags()->coverage_dir); - // interceptors - InitTlsSize(); - // Create main thread. MemprofThread *main_thread = CreateMainThread(); CHECK_EQ(0, main_thread->tid()); diff --git a/compiler-rt/lib/msan/msan.cpp b/compiler-rt/lib/msan/msan.cpp index 2ee05f43ec5e..6c27ab21eeeb 100644 --- a/compiler-rt/lib/msan/msan.cpp +++ b/compiler-rt/lib/msan/msan.cpp @@ -457,10 +457,11 @@ void __msan_init() { __sanitizer_set_report_path(common_flags()->log_path); + InitializePlatformEarly(); + InitializeInterceptors(); InstallAtForkHandler(); CheckASLR(); - InitTlsSize(); InstallDeadlySignalHandlers(MsanOnDeadlySignal); InstallAtExitHandler(); // Needs __cxa_atexit interceptor. diff --git a/compiler-rt/lib/rtsan/CMakeLists.txt b/compiler-rt/lib/rtsan/CMakeLists.txt index 3f146a757a97..07a21b49eb45 100644 --- a/compiler-rt/lib/rtsan/CMakeLists.txt +++ b/compiler-rt/lib/rtsan/CMakeLists.txt @@ -29,6 +29,8 @@ set(RTSAN_LINK_LIBS ${COMPILER_RT_UNWINDER_LINK_LIBS} ${COMPILER_RT_CXX_LINK_LIBS}) +append_rtti_flag(OFF RTSAN_CFLAGS) + if(APPLE) add_compiler_rt_object_libraries(RTRtsan OS ${SANITIZER_COMMON_SUPPORTED_OS} diff --git a/compiler-rt/lib/rtsan/rtsan_context.cpp b/compiler-rt/lib/rtsan/rtsan_context.cpp index 8609394fa222..e69fb259798d 100644 --- a/compiler-rt/lib/rtsan/rtsan_context.cpp +++ b/compiler-rt/lib/rtsan/rtsan_context.cpp @@ -62,7 +62,7 @@ static __rtsan::Context &GetContextForThisThreadImpl() { Until then, and to keep the first PRs small, only the exit mode is available. */ -static void InvokeViolationDetectedAction() { exit(EXIT_FAILURE); } +static void InvokeViolationDetectedAction() { Die(); } __rtsan::Context::Context() = default; diff --git a/compiler-rt/lib/rtsan/rtsan_flags.cpp b/compiler-rt/lib/rtsan/rtsan_flags.cpp index beab2a2fc5d8..9c90d23d7426 100644 --- a/compiler-rt/lib/rtsan/rtsan_flags.cpp +++ b/compiler-rt/lib/rtsan/rtsan_flags.cpp @@ -35,6 +35,7 @@ void __rtsan::InitializeFlags() { { CommonFlags cf; cf.CopyFrom(*common_flags()); + cf.exitcode = 43; cf.external_symbolizer_path = GetEnv("RTSAN_SYMBOLIZER_PATH"); OverrideCommonFlags(cf); } diff --git a/compiler-rt/lib/rtsan/tests/rtsan_test_main.cpp b/compiler-rt/lib/rtsan/tests/rtsan_test_main.cpp index 255ac9497103..50c726e09f28 100644 --- a/compiler-rt/lib/rtsan/tests/rtsan_test_main.cpp +++ b/compiler-rt/lib/rtsan/tests/rtsan_test_main.cpp @@ -8,8 +8,25 @@ // //===----------------------------------------------------------------------===// +#include "sanitizer_common/sanitizer_platform.h" #include "sanitizer_test_utils.h" +// Default RTSAN_OPTIONS for the unit tests. +extern "C" const char *__rtsan_default_options() { +#if SANITIZER_APPLE + // On Darwin, we default to `abort_on_error=1`, which would make tests run + // much slower. Let's override this and run lit tests with 'abort_on_error=0' + // and make sure we do not overwhelm the syslog while testing. Also, let's + // turn symbolization off to speed up testing, especially when not running + // with llvm-symbolizer but with atos. + return "symbolize=false:abort_on_error=0:log_to_syslog=0"; +#else + // Let's turn symbolization off to speed up testing (more than 3 times speedup + // observed). + return "symbolize=false"; +#endif +} + int main(int argc, char **argv) { testing::GTEST_FLAG(death_test_style) = "threadsafe"; testing::InitGoogleTest(&argc, argv); diff --git a/compiler-rt/lib/rtsan/tests/rtsan_test_utilities.h b/compiler-rt/lib/rtsan/tests/rtsan_test_utilities.h index 4ba4fc5e5308..f0cf90e057e3 100644 --- a/compiler-rt/lib/rtsan/tests/rtsan_test_utilities.h +++ b/compiler-rt/lib/rtsan/tests/rtsan_test_utilities.h @@ -37,8 +37,8 @@ void ExpectRealtimeDeath(Function &&Func, : ""; }; - EXPECT_EXIT(RealtimeInvoke(std::forward<Function>(Func)), - ExitedWithCode(EXIT_FAILURE), GetExpectedErrorSubstring()); + EXPECT_EXIT(RealtimeInvoke(std::forward<Function>(Func)), ExitedWithCode(43), + GetExpectedErrorSubstring()); } template <typename Function> void ExpectNonRealtimeSurvival(Function &&Func) { diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_nolibc.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_common_nolibc.cpp index 7d88575160c6..e49285f22dff 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_nolibc.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_nolibc.cpp @@ -22,6 +22,7 @@ namespace __sanitizer { #if !SANITIZER_WINDOWS # if SANITIZER_LINUX void LogMessageOnPrintf(const char *str) {} +void InitTlsSize() {} # endif void WriteToSyslog(const char *buffer) {} void Abort() { internal__exit(1); } diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_fuchsia.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_fuchsia.cpp index a67b2a8725ec..75dcf546729f 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_fuchsia.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_fuchsia.cpp @@ -94,7 +94,6 @@ void DisableCoreDumperIfNecessary() {} void InstallDeadlySignalHandlers(SignalHandlerType handler) {} void SetAlternateSignalStack() {} void UnsetAlternateSignalStack() {} -void InitTlsSize() {} bool SignalContext::IsStackOverflow() const { return false; } void SignalContext::DumpAllRegisters(void *context) { UNIMPLEMENTED(); } diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp index 6359f4348e3c..1c637d109649 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp @@ -2672,9 +2672,7 @@ static void GetPcSpBp(void *context, uptr *pc, uptr *sp, uptr *bp) { void SignalContext::InitPcSpBp() { GetPcSpBp(context, &pc, &sp, &bp); } -void InitializePlatformEarly() { - // Do nothing. -} +void InitializePlatformEarly() { InitTlsSize(); } void CheckASLR() { # if SANITIZER_NETBSD diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp index 4ecd47388c1d..93158b0d41aa 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp @@ -209,7 +209,6 @@ bool SetEnv(const char *name, const char *value) { __attribute__((unused)) static int g_use_dlpi_tls_data; # if SANITIZER_GLIBC && !SANITIZER_GO - static void GetGLibcVersion(int *major, int *minor, int *patch) { const char *p = gnu_get_libc_version(); *major = internal_simple_strtoll(p, &p, 10); @@ -218,23 +217,6 @@ static void GetGLibcVersion(int *major, int *minor, int *patch) { *minor = (*p == '.') ? internal_simple_strtoll(p + 1, &p, 10) : 0; *patch = (*p == '.') ? internal_simple_strtoll(p + 1, &p, 10) : 0; } - -__attribute__((unused)) static size_t g_tls_size; - -void InitTlsSize() { - int major, minor, patch; - GetGLibcVersion(&major, &minor, &patch); - g_use_dlpi_tls_data = major == 2 && minor >= 25; - -# if defined(__aarch64__) || defined(__x86_64__) || \ - defined(__powerpc64__) || defined(__loongarch__) - void *get_tls_static_info = dlsym(RTLD_DEFAULT, "_dl_get_tls_static_info"); - size_t tls_align; - ((void (*)(size_t *, size_t *))get_tls_static_info)(&g_tls_size, &tls_align); -# endif -} -# else -void InitTlsSize() {} # endif // SANITIZER_GLIBC && !SANITIZER_GO // On glibc x86_64, ThreadDescriptorSize() needs to be precise due to the usage @@ -242,7 +224,7 @@ void InitTlsSize() {} // to get the pointer to thread-specific data keys in the thread control block. # if (SANITIZER_FREEBSD || SANITIZER_GLIBC) && !SANITIZER_GO // sizeof(struct pthread) from glibc. -static atomic_uintptr_t thread_descriptor_size; +static uptr thread_descriptor_size; // FIXME: Implementation is very GLIBC specific, but it's used by FREEBSD. static uptr ThreadDescriptorSizeFallback() { @@ -323,19 +305,41 @@ static uptr ThreadDescriptorSizeFallback() { # endif } -uptr ThreadDescriptorSize() { - uptr val = atomic_load_relaxed(&thread_descriptor_size); - if (val) - return val; - // _thread_db_sizeof_pthread is a GLIBC_PRIVATE symbol that is exported in - // glibc 2.34 and later. - if (unsigned *psizeof = static_cast<unsigned *>( - dlsym(RTLD_DEFAULT, "_thread_db_sizeof_pthread"))) - val = *psizeof; - if (!val) - val = ThreadDescriptorSizeFallback(); - atomic_store_relaxed(&thread_descriptor_size, val); - return val; +uptr ThreadDescriptorSize() { return thread_descriptor_size; } + +# if SANITIZER_GLIBC +__attribute__((unused)) static size_t g_tls_size; +# endif + +void InitTlsSize() { +# if SANITIZER_GLIBC + int major, minor, patch; + GetGLibcVersion(&major, &minor, &patch); + g_use_dlpi_tls_data = major == 2 && minor >= 25; + + if (major == 2 && minor >= 34) { + // _thread_db_sizeof_pthread is a GLIBC_PRIVATE symbol that is exported in + // glibc 2.34 and later. + if (unsigned *psizeof = static_cast<unsigned *>( + dlsym(RTLD_DEFAULT, "_thread_db_sizeof_pthread"))) { + thread_descriptor_size = *psizeof; + } + } + +# if defined(__aarch64__) || defined(__x86_64__) || \ + defined(__powerpc64__) || defined(__loongarch__) + auto *get_tls_static_info = (void (*)(size_t *, size_t *))dlsym( + RTLD_DEFAULT, "_dl_get_tls_static_info"); + size_t tls_align; + // Can be null if static link. + if (get_tls_static_info) + get_tls_static_info(&g_tls_size, &tls_align); +# endif + +# endif // SANITIZER_GLIBC + + if (!thread_descriptor_size) + thread_descriptor_size = ThreadDescriptorSizeFallback(); } # if defined(__mips__) || defined(__powerpc64__) || SANITIZER_RISCV64 || \ @@ -358,8 +362,9 @@ static uptr TlsPreTcbSize() { return kTlsPreTcbSize; } # endif - -# endif +# else // (SANITIZER_FREEBSD || SANITIZER_GLIBC) && !SANITIZER_GO +void InitTlsSize() {} +# endif // (SANITIZER_FREEBSD || SANITIZER_GLIBC) && !SANITIZER_GO # if (SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_SOLARIS) && \ !SANITIZER_ANDROID && !SANITIZER_GO diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp index 2a36104e6f9f..26d2e8d4ed76 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp @@ -545,9 +545,6 @@ uptr GetTlsSize() { return 0; } -void InitTlsSize() { -} - uptr TlsBaseAddr() { uptr segbase = 0; #if defined(__x86_64__) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.cpp index a17a14882d0e..6f3b6af3c584 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.cpp @@ -14,6 +14,8 @@ #include "sanitizer_allocator_interface.h" #include "sanitizer_atomic.h" +#include "sanitizer_common/sanitizer_common.h" +#include "sanitizer_common/sanitizer_internal_defs.h" #include "sanitizer_flags.h" #include "sanitizer_platform_interceptors.h" @@ -115,11 +117,14 @@ SANITIZER_INTERFACE_WEAK_DEF(uptr, __sanitizer_get_dtls_size, const void *start = __sanitizer_get_allocated_begin(tls_begin); if (!start) return 0; - CHECK_EQ(start, tls_begin); + CHECK_LE(start, tls_begin); uptr tls_size = __sanitizer_get_allocated_size(start); VReport(2, "__tls_get_addr: glibc DTLS suspected; tls={%p,0x%zx}\n", tls_begin, tls_size); - return tls_size; + uptr offset = + (reinterpret_cast<uptr>(tls_begin) - reinterpret_cast<uptr>(start)); + CHECK_LE(offset, tls_size); + return tls_size - offset; } DTLS::DTV *DTLS_on_tls_get_addr(void *arg_void, void *res, diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp index 2c8f8343519e..7cee57131486 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp @@ -873,9 +873,6 @@ uptr GetTlsSize() { return 0; } -void InitTlsSize() { -} - void GetThreadStackAndTls(bool main, uptr *stk_begin, uptr *stk_end, uptr *tls_begin, uptr *tls_end) { # if SANITIZER_GO diff --git a/compiler-rt/lib/sanitizer_common/tests/sanitizer_linux_test.cpp b/compiler-rt/lib/sanitizer_common/tests/sanitizer_linux_test.cpp index 338c4d3bab2b..b286ab72a5c7 100644 --- a/compiler-rt/lib/sanitizer_common/tests/sanitizer_linux_test.cpp +++ b/compiler-rt/lib/sanitizer_common/tests/sanitizer_linux_test.cpp @@ -202,6 +202,8 @@ TEST(SanitizerLinux, ThreadDescriptorSize) { void *result; ASSERT_EQ(0, pthread_create(&tid, 0, thread_descriptor_size_test_func, 0)); ASSERT_EQ(0, pthread_join(tid, &result)); + EXPECT_EQ(0u, ThreadDescriptorSize()); + InitTlsSize(); EXPECT_EQ((uptr)result, ThreadDescriptorSize()); } # endif diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp index 621c679a05db..3e08a1bece98 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp @@ -418,7 +418,6 @@ void InitializePlatform() { Die(); } - InitTlsSize(); #endif // !SANITIZER_GO } diff --git a/compiler-rt/lib/ubsan/ubsan_init.cpp b/compiler-rt/lib/ubsan/ubsan_init.cpp index 5802d58896f0..aea7ca00e3cb 100644 --- a/compiler-rt/lib/ubsan/ubsan_init.cpp +++ b/compiler-rt/lib/ubsan/ubsan_init.cpp @@ -43,8 +43,8 @@ static void CommonStandaloneInit() { SanitizerToolName = GetSanititizerToolName(); CacheBinaryName(); InitializeFlags(); - __sanitizer::InitializePlatformEarly(); __sanitizer_set_report_path(common_flags()->log_path); + __sanitizer::InitializePlatformEarly(); AndroidLogInit(); InitializeCoverage(common_flags()->coverage, common_flags()->coverage_dir); CommonInit(); diff --git a/compiler-rt/test/rtsan/basic.cpp b/compiler-rt/test/rtsan/basic.cpp index 607db90213a3..4edf32336720 100644 --- a/compiler-rt/test/rtsan/basic.cpp +++ b/compiler-rt/test/rtsan/basic.cpp @@ -1,5 +1,4 @@ // RUN: %clangxx -fsanitize=realtime %s -o %t -// RUN: %clang -fsanitize=realtime %s -o %t // RUN: not %run %t 2>&1 | FileCheck %s // UNSUPPORTED: ios diff --git a/compiler-rt/test/rtsan/lit.cfg.py b/compiler-rt/test/rtsan/lit.cfg.py index b262ecfa7fb4..7c75515a7608 100644 --- a/compiler-rt/test/rtsan/lit.cfg.py +++ b/compiler-rt/test/rtsan/lit.cfg.py @@ -3,6 +3,22 @@ import os # Setup config name. config.name = "RTSAN" + config.name_suffix + +default_rtsan_opts = "atexit_sleep_ms=0" + +if config.host_os == "Darwin": + # On Darwin, we default to `abort_on_error=1`, which would make tests run + # much slower. Let's override this and run lit tests with 'abort_on_error=0'. + default_rtsan_opts += ":abort_on_error=0" + +if default_rtsan_opts: + config.environment["RTSAN_OPTIONS"] = default_rtsan_opts + default_rtsan_opts += ":" + +config.substitutions.append( + ("%env_rtsan_opts=", "env RTSAN_OPTIONS=" + default_rtsan_opts) +) + # Setup source root. config.test_source_root = os.path.dirname(__file__) diff --git a/compiler-rt/test/rtsan/sanity_check_pure_c.c b/compiler-rt/test/rtsan/sanity_check_pure_c.c new file mode 100644 index 000000000000..bdca6039d932 --- /dev/null +++ b/compiler-rt/test/rtsan/sanity_check_pure_c.c @@ -0,0 +1,28 @@ +// RUN: %clang -fsanitize=realtime %s -o %t +// RUN: not %run %t 2>&1 | FileCheck %s +// RUN: %clang %s -o %t +// RUN: %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-NO-SANITIZE +#ifdef __cplusplus +# error "This test must be built in C mode" +#endif + +#include <stdio.h> +#include <stdlib.h> + +// Check that we can build and run C code. + +void nonblocking_function(void) __attribute__((nonblocking)); + +void nonblocking_function(void) __attribute__((nonblocking)) { + void *ptr = malloc(2); + printf("ptr: %p\n", ptr); // ensure we don't optimize out the malloc +} + +int main() { + nonblocking_function(); + printf("Done\n"); + return 0; +} + +// CHECK: ==ERROR: RealtimeSanitizer +// CHECK-NO-SANITIZE: Done diff --git a/flang/include/flang/Evaluate/traverse.h b/flang/include/flang/Evaluate/traverse.h index 7f4a67d97e64..90b93f6afd35 100644 --- a/flang/include/flang/Evaluate/traverse.h +++ b/flang/include/flang/Evaluate/traverse.h @@ -217,7 +217,7 @@ public: return CombineContents(x); } Result operator()(const semantics::DerivedTypeSpec &x) const { - return Combine(x.typeSymbol(), x.parameters()); + return Combine(x.originalTypeSymbol(), x.parameters()); } Result operator()(const StructureConstructorValues::value_type &x) const { return visitor_(x.second); diff --git a/flang/include/flang/Semantics/type.h b/flang/include/flang/Semantics/type.h index e2d47d38f927..e2131e7e160c 100644 --- a/flang/include/flang/Semantics/type.h +++ b/flang/include/flang/Semantics/type.h @@ -259,6 +259,7 @@ public: DerivedTypeSpec(DerivedTypeSpec &&); const SourceName &name() const { return name_; } + const Symbol &originalTypeSymbol() const { return originalTypeSymbol_; } const Symbol &typeSymbol() const { return typeSymbol_; } const Scope *scope() const { return scope_; } // Return scope_ if it is set, or the typeSymbol_ scope otherwise. @@ -319,7 +320,8 @@ public: private: SourceName name_; - const Symbol &typeSymbol_; + const Symbol &originalTypeSymbol_; + const Symbol &typeSymbol_; // == originalTypeSymbol_.GetUltimate() const Scope *scope_{nullptr}; // same as typeSymbol_.scope() unless PDT bool cooked_{false}; bool evaluated_{false}; @@ -328,8 +330,9 @@ private: ParameterMapType parameters_; Category category_{Category::DerivedType}; bool RawEquals(const DerivedTypeSpec &that) const { - return &typeSymbol_ == &that.typeSymbol_ && cooked_ == that.cooked_ && - rawParameters_ == that.rawParameters_; + return &typeSymbol_ == &that.typeSymbol_ && + &originalTypeSymbol_ == &that.originalTypeSymbol_ && + cooked_ == that.cooked_ && rawParameters_ == that.rawParameters_; } friend llvm::raw_ostream &operator<<( llvm::raw_ostream &, const DerivedTypeSpec &); diff --git a/flang/lib/Evaluate/characteristics.cpp b/flang/lib/Evaluate/characteristics.cpp index 70e24d6e82eb..2496e4427fe7 100644 --- a/flang/lib/Evaluate/characteristics.cpp +++ b/flang/lib/Evaluate/characteristics.cpp @@ -66,8 +66,9 @@ bool ShapesAreCompatible(const std::optional<Shape> &x, } bool TypeAndShape::operator==(const TypeAndShape &that) const { - return type_ == that.type_ && ShapesAreCompatible(shape_, that.shape_) && - attrs_ == that.attrs_ && corank_ == that.corank_; + return type_.IsEquivalentTo(that.type_) && + ShapesAreCompatible(shape_, that.shape_) && attrs_ == that.attrs_ && + corank_ == that.corank_; } TypeAndShape &TypeAndShape::Rewrite(FoldingContext &context) { diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp index cda82bcb7ecc..267c3ceb44f3 100644 --- a/flang/lib/Frontend/FrontendActions.cpp +++ b/flang/lib/Frontend/FrontendActions.cpp @@ -425,7 +425,7 @@ void PrintPreprocessedAction::executeAction() { // If a pre-defined output stream exists, dump the preprocessed content there if (!ci.isOutputStreamNull()) { // Send the output to the pre-defined output buffer. - ci.writeOutputStream(outForPP.str()); + ci.writeOutputStream(buf); return; } @@ -436,7 +436,7 @@ void PrintPreprocessedAction::executeAction() { return; } - (*os) << outForPP.str(); + (*os) << buf; } void DebugDumpProvenanceAction::executeAction() { @@ -756,7 +756,7 @@ getRISCVVScaleRange(CompilerInstance &ci) { outputErrMsg << errMsg.getMessage(); }); ci.getDiagnostics().Report(clang::diag::err_invalid_feature_combination) - << outputErrMsg.str(); + << buffer; return std::nullopt; } @@ -1091,8 +1091,7 @@ public: msgStream << diagInfo.getMsg(); // Emit message. - diags.Report(diagID) << clang::AddFlagValue(diagInfo.getPassName()) - << msgStream.str(); + diags.Report(diagID) << clang::AddFlagValue(diagInfo.getPassName()) << msg; } void optimizationRemarkHandler( diff --git a/flang/lib/Frontend/TextDiagnosticPrinter.cpp b/flang/lib/Frontend/TextDiagnosticPrinter.cpp index 8b00fb69b3ce..dc182d68a1a9 100644 --- a/flang/lib/Frontend/TextDiagnosticPrinter.cpp +++ b/flang/lib/Frontend/TextDiagnosticPrinter.cpp @@ -45,7 +45,7 @@ static void printRemarkOption(llvm::raw_ostream &os, // warning could be printed i.e. [-Wunknown-warning-option] os << " [" << (level == clang::DiagnosticsEngine::Remark ? "-R" : "-W") << opt; - llvm::StringRef optValue = info.getDiags()->getFlagValue(); + llvm::StringRef optValue = info.getFlagValue(); if (!optValue.empty()) os << "=" << optValue; os << ']'; diff --git a/flang/lib/Optimizer/Dialect/FIRType.cpp b/flang/lib/Optimizer/Dialect/FIRType.cpp index 05f644654efe..7a516298e5ef 100644 --- a/flang/lib/Optimizer/Dialect/FIRType.cpp +++ b/flang/lib/Optimizer/Dialect/FIRType.cpp @@ -533,9 +533,8 @@ int getTypeCode(mlir::Type ty, const fir::KindMapping &kindMap) { std::string getTypeAsString(mlir::Type ty, const fir::KindMapping &kindMap, llvm::StringRef prefix) { - std::string buf; + std::string buf = prefix.str(); llvm::raw_string_ostream name{buf}; - name << prefix.str(); if (!prefix.empty()) name << "_"; while (ty) { @@ -606,7 +605,7 @@ std::string getTypeAsString(mlir::Type ty, const fir::KindMapping &kindMap, llvm::report_fatal_error("unsupported type"); } } - return name.str(); + return buf; } mlir::Type changeElementType(mlir::Type type, mlir::Type newElementType, diff --git a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp index 82c6a6618e0e..1390fae062b9 100644 --- a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp +++ b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp @@ -271,6 +271,7 @@ mlir::LLVM::DITypeAttr DebugTypeGenerator::convertCharacterType( uint64_t sizeInBits = 0; mlir::LLVM::DIExpressionAttr lenExpr = nullptr; mlir::LLVM::DIExpressionAttr locExpr = nullptr; + mlir::LLVM::DIVariableAttr varAttr = nullptr; if (hasDescriptor) { llvm::SmallVector<mlir::LLVM::DIExpressionElemAttr> ops; @@ -289,7 +290,29 @@ mlir::LLVM::DITypeAttr DebugTypeGenerator::convertCharacterType( sizeInBits = charTy.getLen() * kindMapping.getCharacterBitsize(charTy.getFKind()); } else { - return genPlaceholderType(context); + // In assumed length string, the len of the character is not part of the + // type but can be found at the runtime. Here we create an artificial + // variable that will contain that length. This variable is used as + // 'stringLength' in DIStringTypeAttr. + if (declOp && !declOp.getTypeparams().empty()) { + mlir::Operation *op = declOp.getTypeparams()[0].getDefiningOp(); + if (auto unbox = mlir::dyn_cast_or_null<fir::UnboxCharOp>(op)) { + auto name = + mlir::StringAttr::get(context, "." + declOp.getUniqName().str()); + mlir::OpBuilder builder(context); + builder.setInsertionPoint(declOp); + mlir::Type i64Ty = builder.getIntegerType(64); + auto convOp = builder.create<fir::ConvertOp>(unbox.getLoc(), i64Ty, + unbox.getResult(1)); + mlir::LLVM::DITypeAttr Ty = convertType(i64Ty, fileAttr, scope, declOp); + auto lvAttr = mlir::LLVM::DILocalVariableAttr::get( + context, scope, name, fileAttr, /*line=*/0, /*argNo=*/0, + /*alignInBits=*/0, Ty, mlir::LLVM::DIFlags::Artificial); + builder.create<mlir::LLVM::DbgValueOp>(convOp.getLoc(), convOp, lvAttr, + nullptr); + varAttr = mlir::cast<mlir::LLVM::DIVariableAttr>(lvAttr); + } + } } // FIXME: Currently the DIStringType in llvm does not have the option to set @@ -299,7 +322,7 @@ mlir::LLVM::DITypeAttr DebugTypeGenerator::convertCharacterType( return mlir::LLVM::DIStringTypeAttr::get( context, llvm::dwarf::DW_TAG_string_type, mlir::StringAttr::get(context, ""), sizeInBits, /*alignInBits=*/0, - /*stringLength=*/nullptr, lenExpr, locExpr, encoding); + /*stringLength=*/varAttr, lenExpr, locExpr, encoding); } mlir::LLVM::DITypeAttr DebugTypeGenerator::convertPointerLikeType( diff --git a/flang/lib/Parser/parsing.cpp b/flang/lib/Parser/parsing.cpp index 43a898ff120c..37dc113436aa 100644 --- a/flang/lib/Parser/parsing.cpp +++ b/flang/lib/Parser/parsing.cpp @@ -42,9 +42,9 @@ const SourceFile *Parsing::Prescan(const std::string &path, Options options) { sourceFile = allSources.Open(path, fileError, "."s /*prepend to search path*/); } - if (!fileError.str().empty()) { + if (!buf.empty()) { ProvenanceRange range{allSources.AddCompilerInsertion(path)}; - messages_.Say(range, "%s"_err_en_US, fileError.str()); + messages_.Say(range, "%s"_err_en_US, buf); return sourceFile; } CHECK(sourceFile); diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp index b852fbf12a6e..dfd49db74eea 100644 --- a/flang/lib/Semantics/check-declarations.cpp +++ b/flang/lib/Semantics/check-declarations.cpp @@ -2519,7 +2519,7 @@ void CheckHelper::CheckProcBinding( ? "A NOPASS type-bound procedure may not override a passed-argument procedure"_err_en_US : "A passed-argument type-bound procedure may not override a NOPASS procedure"_err_en_US); } else { - const auto *bindingChars{Characterize(binding.symbol())}; + const auto *bindingChars{Characterize(symbol)}; const auto *overriddenChars{Characterize(*overridden)}; if (bindingChars && overriddenChars) { if (isNopass) { diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index b99f308e1c7f..5414787d85f7 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -3053,11 +3053,16 @@ void ModuleVisitor::DoAddUse(SourceName location, SourceName localName, const Symbol &useUltimate{useSymbol.GetUltimate()}; const auto *useGeneric{useUltimate.detailsIf<GenericDetails>()}; if (localSymbol->has<UnknownDetails>()) { - if (useGeneric && useGeneric->specific() && - IsProcedurePointer(*useGeneric->specific())) { - // We are use-associating a generic that shadows a procedure pointer. - // Local references that might be made to that procedure pointer should - // use a UseDetails symbol for proper data addressing. So create an + if (useGeneric && + ((useGeneric->specific() && + IsProcedurePointer(*useGeneric->specific())) || + (useGeneric->derivedType() && + useUltimate.name() != localSymbol->name()))) { + // We are use-associating a generic that either shadows a procedure + // pointer or shadows a derived type of the same name. + // Local references that might be made to the procedure pointer should + // use a UseDetails symbol for proper data addressing, and a derived + // type needs to be in scope with the renamed name. So create an // empty local generic now into which the use-associated generic may // be copied. localSymbol->set_details(GenericDetails{}); @@ -3153,9 +3158,15 @@ void ModuleVisitor::DoAddUse(SourceName location, SourceName localName, if (!useDerivedType) { combinedDerivedType = localDerivedType; } else if (!localDerivedType) { - combinedDerivedType = useDerivedType; + if (useDerivedType->name() == localName) { + combinedDerivedType = useDerivedType; + } else { + Symbol &combined{currScope().MakeSymbol(localName, + useDerivedType->attrs(), UseDetails{localName, *useDerivedType})}; + combinedDerivedType = &combined; + } } else { - const Scope *localScope{localDerivedType->scope()}; + const Scope *localScope{localDerivedType->GetUltimate().scope()}; const Scope *useScope{useDerivedType->GetUltimate().scope()}; if (localScope && useScope && localScope->derivedTypeSpec() && useScope->derivedTypeSpec() && @@ -4351,15 +4362,18 @@ bool SubprogramVisitor::BeginSubprogram(const parser::Name &name, Symbol::Flag subpFlag, bool hasModulePrefix, const parser::LanguageBindingSpec *bindingSpec, const ProgramTree::EntryStmtList *entryStmts) { + bool isValid{true}; if (hasModulePrefix && !currScope().IsModule() && !currScope().IsSubmodule()) { // C1547 Say(name, "'%s' is a MODULE procedure which must be declared within a " "MODULE or SUBMODULE"_err_en_US); - return false; + // Don't return here because it can be useful to have the scope set for + // other semantic checks run before we print the errors + isValid = false; } Symbol *moduleInterface{nullptr}; - if (hasModulePrefix && !inInterfaceBlock()) { + if (isValid && hasModulePrefix && !inInterfaceBlock()) { moduleInterface = FindSeparateModuleProcedureInterface(name); if (moduleInterface && &moduleInterface->owner() == &currScope()) { // Subprogram is MODULE FUNCTION or MODULE SUBROUTINE with an interface @@ -6773,9 +6787,7 @@ std::optional<DerivedTypeSpec> DeclarationVisitor::ResolveDerivedType( } if (CheckUseError(name)) { return std::nullopt; - } - symbol = &symbol->GetUltimate(); - if (symbol->has<DerivedTypeDetails>()) { + } else if (symbol->GetUltimate().has<DerivedTypeDetails>()) { return DerivedTypeSpec{name.source, *symbol}; } else { Say(name, "'%s' is not a derived type"_err_en_US); @@ -7117,12 +7129,10 @@ bool ConstructVisitor::Pre(const parser::DataStmtValue &x) { auto &mutableData{const_cast<parser::DataStmtConstant &>(data)}; if (auto *elem{parser::Unwrap<parser::ArrayElement>(mutableData)}) { if (const auto *name{std::get_if<parser::Name>(&elem->base.u)}) { - if (const Symbol * symbol{FindSymbol(*name)}) { - const Symbol &ultimate{symbol->GetUltimate()}; - if (ultimate.has<DerivedTypeDetails>()) { - mutableData.u = elem->ConvertToStructureConstructor( - DerivedTypeSpec{name->source, ultimate}); - } + if (const Symbol * symbol{FindSymbol(*name)}; + symbol && symbol->GetUltimate().has<DerivedTypeDetails>()) { + mutableData.u = elem->ConvertToStructureConstructor( + DerivedTypeSpec{name->source, *symbol}); } } } @@ -8738,6 +8748,9 @@ void ResolveNamesVisitor::FinishSpecificationPart( CheckImports(); for (auto &pair : currScope()) { auto &symbol{*pair.second}; + if (inInterfaceBlock()) { + ConvertToObjectEntity(symbol); + } if (NeedsExplicitType(symbol)) { ApplyImplicitRules(symbol); } diff --git a/flang/lib/Semantics/type.cpp b/flang/lib/Semantics/type.cpp index 810b9829b0b8..e867d7ad6e25 100644 --- a/flang/lib/Semantics/type.cpp +++ b/flang/lib/Semantics/type.cpp @@ -22,8 +22,9 @@ namespace Fortran::semantics { DerivedTypeSpec::DerivedTypeSpec(SourceName name, const Symbol &typeSymbol) - : name_{name}, typeSymbol_{typeSymbol} { - CHECK(typeSymbol.has<DerivedTypeDetails>()); + : name_{name}, originalTypeSymbol_{typeSymbol}, + typeSymbol_{typeSymbol.GetUltimate()} { + CHECK(typeSymbol_.has<DerivedTypeDetails>()); } DerivedTypeSpec::DerivedTypeSpec(const DerivedTypeSpec &that) = default; DerivedTypeSpec::DerivedTypeSpec(DerivedTypeSpec &&that) = default; @@ -340,9 +341,7 @@ void DerivedTypeSpec::Instantiate(Scope &containingScope) { const Scope &typeScope{DEREF(typeSymbol_.scope())}; if (!MightBeParameterized()) { scope_ = &typeScope; - if (typeScope.derivedTypeSpec()) { - CHECK(*this == *typeScope.derivedTypeSpec()); - } else { + if (!typeScope.derivedTypeSpec() || *this != *typeScope.derivedTypeSpec()) { Scope &mutableTypeScope{const_cast<Scope &>(typeScope)}; mutableTypeScope.set_derivedTypeSpec(*this); InstantiateNonPDTScope(mutableTypeScope, containingScope); @@ -664,7 +663,7 @@ std::string DerivedTypeSpec::VectorTypeAsFortran() const { std::string DerivedTypeSpec::AsFortran() const { std::string buf; llvm::raw_string_ostream ss{buf}; - ss << name_; + ss << originalTypeSymbol_.name(); if (!rawParameters_.empty()) { CHECK(parameters_.empty()); ss << '('; diff --git a/flang/runtime/io-api.cpp b/flang/runtime/io-api.cpp index e3c6b9e5ca89..39ac8c9eb6de 100644 --- a/flang/runtime/io-api.cpp +++ b/flang/runtime/io-api.cpp @@ -948,7 +948,7 @@ bool IODEF(SetRecl)(Cookie cookie, std::size_t n) { io.GetIoErrorHandler().Crash( "SetRecl() called after GetNewUnit() for an OPEN statement"); } - if (n <= 0) { + if (static_cast<std::int64_t>(n) <= 0) { io.GetIoErrorHandler().SignalError("RECL= must be greater than zero"); return false; } else if (open->wasExtant() && diff --git a/flang/runtime/io-stmt.cpp b/flang/runtime/io-stmt.cpp index 265bd0dc9d94..cd7a196335d3 100644 --- a/flang/runtime/io-stmt.cpp +++ b/flang/runtime/io-stmt.cpp @@ -329,8 +329,11 @@ void OpenStatementState::CompleteOperation() { } if (!wasExtant_ && InError()) { // Release the new unit on failure - unit().CloseUnit(CloseStatus::Delete, *this); - unit().DestroyClosed(); + if (ExternalFileUnit * + toClose{unit().LookUpForClose(unit().unitNumber())}) { + toClose->Close(CloseStatus::Delete, *this); + toClose->DestroyClosed(); + } } IoStatementBase::CompleteOperation(); } diff --git a/flang/test/Semantics/OpenMP/bad_module_subroutine.f90 b/flang/test/Semantics/OpenMP/bad_module_subroutine.f90 new file mode 100644 index 000000000000..339d6bf27e7d --- /dev/null +++ b/flang/test/Semantics/OpenMP/bad_module_subroutine.f90 @@ -0,0 +1,6 @@ +! RUN: %python %S/../test_errors.py %s %flang_fc1 -fopenmp +! Test that we don't crash on this code inside of openmp semantics checks + +!ERROR: 'e' is a MODULE procedure which must be declared within a MODULE or SUBMODULE +impure elemental module subroutine e() +end subroutine diff --git a/flang/test/Semantics/get_team.f90 b/flang/test/Semantics/get_team.f90 index a28b0d72f23f..7e4886703d17 100644 --- a/flang/test/Semantics/get_team.f90 +++ b/flang/test/Semantics/get_team.f90 @@ -49,7 +49,7 @@ program get_team_test !ERROR: repeated keyword argument to intrinsic 'get_team' result_team = get_team(level=initial_team, level=parent_team) - !ERROR: No intrinsic or user-defined ASSIGNMENT(=) matches operand types LOGICAL(4) and TYPE(__builtin_team_type) + !ERROR: No intrinsic or user-defined ASSIGNMENT(=) matches operand types LOGICAL(4) and TYPE(team_type) wrong_result_type = get_team() end program get_team_test diff --git a/flang/test/Semantics/implicit16.f90 b/flang/test/Semantics/implicit16.f90 new file mode 100644 index 000000000000..4a03e0c15747 --- /dev/null +++ b/flang/test/Semantics/implicit16.f90 @@ -0,0 +1,12 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 +interface +!ERROR: No explicit type declared for 'a' + subroutine s(a) + implicit none + end +!ERROR: No explicit type declared for 'f' + function f() + implicit none + end +end interface +end diff --git a/flang/test/Semantics/modfile68.f90 b/flang/test/Semantics/modfile68.f90 new file mode 100644 index 000000000000..550560303f08 --- /dev/null +++ b/flang/test/Semantics/modfile68.f90 @@ -0,0 +1,42 @@ +! RUN: %python %S/test_modfile.py %s %flang_fc1 +module m1 + use iso_c_binding, only : c_ptr, c_null_ptr + private + public :: t1 + type :: t1 + type(c_ptr) :: c_ptr = c_null_ptr + end type +end + +!Expect: m1.mod +!module m1 +!use,intrinsic::__fortran_builtins,only:__builtin_c_ptr +!use,intrinsic::iso_c_binding,only:c_ptr +!use,intrinsic::iso_c_binding,only:c_null_ptr +!private::__builtin_c_ptr +!private::c_ptr +!private::c_null_ptr +!type::t1 +!type(c_ptr)::c_ptr=__builtin_c_ptr(__address=0_8) +!end type +!end + +module m2 + use m1, only : t1 + private + public :: t2 + type :: t2 + type(t1) :: x = t1() + end type +end + +!Expect: m2.mod +!module m2 +!use,intrinsic::__fortran_builtins,only:__builtin_c_ptr +!use m1,only:t1 +!private::__builtin_c_ptr +!private::t1 +!type::t2 +!type(t1)::x=t1(c_ptr=__builtin_c_ptr(__address=0_8)) +!end type +!end diff --git a/flang/test/Semantics/modproc01.f90 b/flang/test/Semantics/modproc01.f90 index 5652e15750c7..5f45362e9509 100644 --- a/flang/test/Semantics/modproc01.f90 +++ b/flang/test/Semantics/modproc01.f90 @@ -144,8 +144,12 @@ end program !CHECK: a1, ALLOCATABLE size=40 offset=0: ObjectEntity type: TYPE(pdt2(k2=1_4,l2=3_4)) !CHECK: k1: TypeParam type:INTEGER(4) Kind init:1_4 !CHECK: l1: TypeParam type:INTEGER(4) Len init:3_4 -!CHECK: DerivedType scope: size=1 alignment=1 instantiation of pdt2(k2=1_4,l2=3_4) -!CHECK: a2: ObjectEntity type: TYPE(pdt1(k1=1_4,l1=3_4)) shape: 1_8:1_8 +!CHECK: DerivedType scope: size=48 alignment=8 instantiation of pdt2(k2=1_4,l2=3_4) sourceRange=0 bytes +!CHECK: a2 size=40 offset=8: ObjectEntity type: TYPE(pdt1(k1=1_4,l1=3_4)) shape: 1_8:1_8 !CHECK: j2 size=1 offset=0: ObjectEntity type: INTEGER(1) !CHECK: k2: TypeParam type:INTEGER(4) Kind init:1_4 !CHECK: l2: TypeParam type:INTEGER(4) Len init:3_4 +!CHECK: DerivedType scope: size=40 alignment=8 instantiation of pdt1(k1=1_4,l1=3_4) sourceRange=0 bytes +!CHECK: a1, ALLOCATABLE size=40 offset=0: ObjectEntity type: TYPE(pdt2(k2=1_4,l2=3_4)) +!CHECK: k1: TypeParam type:INTEGER(4) Kind init:1_4 +!CHECK: l1: TypeParam type:INTEGER(4) Len init:3_4 diff --git a/flang/test/Transforms/debug-107988.fir b/flang/test/Transforms/debug-107988.fir new file mode 100644 index 000000000000..308f78a86512 --- /dev/null +++ b/flang/test/Transforms/debug-107988.fir @@ -0,0 +1,23 @@ +// RUN: fir-opt --add-debug-info --mlir-print-debuginfo %s -o - | FileCheck %s + +module attributes {dlti.dl_spec = #dlti.dl_spec<>} { + func.func @test(%arg0: !fir.ref<!fir.char<1,?>> {fir.bindc_name = "str"}, %arg1: i64) { + %0 = fir.emboxchar %arg0, %arg1 : (!fir.ref<!fir.char<1,?>>, i64) -> !fir.boxchar<1> + %1 = fir.undefined !fir.dscope + %2:2 = fir.unboxchar %0 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index) loc(#loc1) + %3 = fircg.ext_declare %2#0 typeparams %2#1 dummy_scope %1 {uniq_name = "_QFtestEstr"} : (!fir.ref<!fir.char<1,?>>, index, !fir.dscope) -> !fir.ref<!fir.char<1,?>> loc(#loc1) + return + } loc(#loc2) +} + +#loc1 = loc("test.f90":5:1) +#loc2 = loc("test.f90":15:1) + +// CHECK: #[[VAR:.*]] = #llvm.di_local_variable<{{.*}}name = "._QFtestEstr"{{.*}}flags = Artificial> +// CHECK: func.func @test +// CHECK: %[[V1:.*]]:2 = fir.unboxchar{{.*}} +// CHECK: %[[V2:.*]] = fir.convert %[[V1]]#1 : (index) -> i64 +// CHECK: llvm.intr.dbg.value #di_local_variable = %[[V2]] : i64 +// CHECK: #[[STR_TY:.*]] = #llvm.di_string_type<tag = DW_TAG_string_type, name = "", stringLength = #[[VAR]], encoding = DW_ATE_ASCII> +// CHECK: #llvm.di_local_variable<{{.*}}name = "str"{{.*}}type = #[[STR_TY]]> + diff --git a/libc/cmake/modules/prepare_libc_gpu_build.cmake b/libc/cmake/modules/prepare_libc_gpu_build.cmake index 14ae8f6e9eec..e20591b80e6f 100644 --- a/libc/cmake/modules/prepare_libc_gpu_build.cmake +++ b/libc/cmake/modules/prepare_libc_gpu_build.cmake @@ -21,10 +21,10 @@ if(LIBC_TARGET_TRIPLE) set(CMAKE_REQUIRED_FLAGS "--target=${LIBC_TARGET_TRIPLE}") endif() if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU) - set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -nogpulib -nostdlib") + set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -nogpulib") elseif(LIBC_TARGET_ARCHITECTURE_IS_NVPTX) set(CMAKE_REQUIRED_FLAGS - "${CMAKE_REQUIRED_FLAGS} -flto -c -Wno-unused-command-line-argument -nostdlib") + "${CMAKE_REQUIRED_FLAGS} -flto -c -Wno-unused-command-line-argument") endif() # Optionally set up a job pool to limit the number of GPU tests run in parallel. diff --git a/libcxx/cmake/caches/AMDGPU.cmake b/libcxx/cmake/caches/AMDGPU.cmake index 0cd2eebfb9c1..7443470b2e8a 100644 --- a/libcxx/cmake/caches/AMDGPU.cmake +++ b/libcxx/cmake/caches/AMDGPU.cmake @@ -33,4 +33,4 @@ set(LIBCXX_ADDITIONAL_COMPILE_FLAGS "-nogpulib;-flto;-fconvergent-functions;-Xclang;-mcode-object-version=none" CACHE STRING "") set(LIBCXXABI_ADDITIONAL_COMPILE_FLAGS "-nogpulib;-flto;-fconvergent-functions;-Xclang;-mcode-object-version=none" CACHE STRING "") -set(CMAKE_REQUIRED_FLAGS "-nogpulib -nodefaultlibs" CACHE STRING "") +set(CMAKE_REQUIRED_FLAGS "-nogpulib" CACHE STRING "") diff --git a/libcxx/cmake/caches/NVPTX.cmake b/libcxx/cmake/caches/NVPTX.cmake index 47a24a349e99..3685ddcbb666 100644 --- a/libcxx/cmake/caches/NVPTX.cmake +++ b/libcxx/cmake/caches/NVPTX.cmake @@ -33,4 +33,4 @@ set(LIBCXX_ADDITIONAL_COMPILE_FLAGS "-nogpulib;-flto;-fconvergent-functions;--cuda-feature=+ptx63" CACHE STRING "") set(LIBCXXABI_ADDITIONAL_COMPILE_FLAGS "-nogpulib;-flto;-fconvergent-functions;--cuda-feature=+ptx63" CACHE STRING "") -set(CMAKE_REQUIRED_FLAGS "-nogpulib -nodefaultlibs -flto -c" CACHE STRING "") +set(CMAKE_REQUIRED_FLAGS "-nogpulib -flto -c" CACHE STRING "") diff --git a/libcxx/include/__algorithm/mismatch.h b/libcxx/include/__algorithm/mismatch.h index 006ddbd80c30..cb83347584b1 100644 --- a/libcxx/include/__algorithm/mismatch.h +++ b/libcxx/include/__algorithm/mismatch.h @@ -79,7 +79,7 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) { } for (size_t __i = 0; __i != __unroll_count; ++__i) { - if (auto __cmp_res = std::__as_mask(__lhs[__i] == __rhs[__i]); !std::__all_of(__cmp_res)) { + if (auto __cmp_res = __lhs[__i] == __rhs[__i]; !std::__all_of(__cmp_res)) { auto __offset = __i * __vec_size + std::__find_first_not_set(__cmp_res); return {__first1 + __offset, __first2 + __offset}; } @@ -91,7 +91,7 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) { // check the remaining 0-3 vectors while (static_cast<size_t>(__last1 - __first1) >= __vec_size) { - if (auto __cmp_res = std::__as_mask(std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2)); + if (auto __cmp_res = std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2); !std::__all_of(__cmp_res)) { auto __offset = std::__find_first_not_set(__cmp_res); return {__first1 + __offset, __first2 + __offset}; @@ -108,8 +108,8 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) { if (static_cast<size_t>(__first1 - __orig_first1) >= __vec_size) { __first1 = __last1 - __vec_size; __first2 = __last2 - __vec_size; - auto __offset = std::__find_first_not_set( - std::__as_mask(std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2))); + auto __offset = + std::__find_first_not_set(std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2)); return {__first1 + __offset, __first2 + __offset}; } // else loop over the elements individually } diff --git a/libcxx/include/__algorithm/simd_utils.h b/libcxx/include/__algorithm/simd_utils.h index ec9840f60d87..56518dafa319 100644 --- a/libcxx/include/__algorithm/simd_utils.h +++ b/libcxx/include/__algorithm/simd_utils.h @@ -116,65 +116,42 @@ template <class _VecT, class _Iter> }(make_index_sequence<__simd_vector_size_v<_VecT>>{}); } -template <size_t _Np> -[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool __all_of(__simd_vector<bool, _Np> __vec) noexcept { - return __builtin_reduce_and(__vec); -} - template <class _Tp, size_t _Np> -[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI auto __as_mask(__simd_vector<_Tp, _Np> __vec) noexcept { - static_assert(!is_same<_Tp, bool>::value, "vector type should not be a bool!"); - return __builtin_convertvector(__vec, __simd_vector<bool, _Np>); -} - -// This uses __builtin_convertvector around the __builtin_shufflevector to work around #107981. -template <size_t _Np> -[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI __simd_vector<bool, 8> -__extend_vector(__simd_vector<bool, _Np> __vec) noexcept { - using _VecT = __simd_vector<bool, _Np>; - if constexpr (_Np == 4) { - return __builtin_convertvector( - __builtin_shufflevector(__vec, _VecT{}, 0, 1, 2, 3, 4, 5, 6, 7), __simd_vector<bool, 8>); - } else if constexpr (_Np == 2) { - return std::__extend_vector( - __builtin_convertvector(__builtin_shufflevector(__vec, _VecT{}, 0, 1, 2, 3), __simd_vector<bool, 4>)); - } else if constexpr (_Np == 1) { - return std::__extend_vector( - __builtin_convertvector(__builtin_shufflevector(__vec, _VecT{}, 0, 1), __simd_vector<bool, 2>)); - } else { - static_assert(sizeof(_VecT) == 0, "Unexpected vector size"); - } +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool __all_of(__simd_vector<_Tp, _Np> __vec) noexcept { + return __builtin_reduce_and(__builtin_convertvector(__vec, __simd_vector<bool, _Np>)); } -template <size_t _Np> -[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI auto __to_int_mask(__simd_vector<bool, _Np> __vec) { - if constexpr (_Np < 8) { - return std::__bit_cast<uint8_t>(std::__extend_vector(__vec)); - } else if constexpr (_Np == 8) { - return std::__bit_cast<uint8_t>(__vec); - } else if constexpr (_Np == 16) { - return std::__bit_cast<uint16_t>(__vec); - } else if constexpr (_Np == 32) { - return std::__bit_cast<uint32_t>(__vec); - } else if constexpr (_Np == 64) { - return std::__bit_cast<uint64_t>(__vec); - } else { - static_assert(sizeof(__simd_vector<bool, _Np>) == 0, "Unexpected vector size"); - return 0; - } -} +template <class _Tp, size_t _Np> +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t __find_first_set(__simd_vector<_Tp, _Np> __vec) noexcept { + using __mask_vec = __simd_vector<bool, _Np>; -template <size_t _Np> -[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t __find_first_set(__simd_vector<bool, _Np> __vec) noexcept { + // This has MSan disabled du to https://github.com/llvm/llvm-project/issues/85876 + auto __impl = [&]<class _MaskT>(_MaskT) _LIBCPP_NO_SANITIZE("memory") noexcept { # if defined(_LIBCPP_BIG_ENDIAN) - return std::min<size_t>(_Np, std::__countl_zero(std::__to_int_mask(__vec))); + return std::min<size_t>( + _Np, std::__countl_zero(__builtin_bit_cast(_MaskT, __builtin_convertvector(__vec, __mask_vec)))); # else - return std::min<size_t>(_Np, std::__countr_zero(std::__to_int_mask(__vec))); + return std::min<size_t>( + _Np, std::__countr_zero(__builtin_bit_cast(_MaskT, __builtin_convertvector(__vec, __mask_vec)))); # endif + }; + + if constexpr (sizeof(__mask_vec) == sizeof(uint8_t)) { + return __impl(uint8_t{}); + } else if constexpr (sizeof(__mask_vec) == sizeof(uint16_t)) { + return __impl(uint16_t{}); + } else if constexpr (sizeof(__mask_vec) == sizeof(uint32_t)) { + return __impl(uint32_t{}); + } else if constexpr (sizeof(__mask_vec) == sizeof(uint64_t)) { + return __impl(uint64_t{}); + } else { + static_assert(sizeof(__mask_vec) == 0, "unexpected required size for mask integer type"); + return 0; + } } -template <size_t _Np> -[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t __find_first_not_set(__simd_vector<bool, _Np> __vec) noexcept { +template <class _Tp, size_t _Np> +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t __find_first_not_set(__simd_vector<_Tp, _Np> __vec) noexcept { return std::__find_first_set(~__vec); } diff --git a/lld/COFF/InputFiles.h b/lld/COFF/InputFiles.h index 5fa93f57ef9e..a20b097cbe04 100644 --- a/lld/COFF/InputFiles.h +++ b/lld/COFF/InputFiles.h @@ -349,7 +349,7 @@ public: MachineTypes getMachineType() const override; DefinedImportData *impSym = nullptr; - Symbol *thunkSym = nullptr; + Defined *thunkSym = nullptr; ImportThunkChunkARM64EC *impchkThunk = nullptr; std::string dllName; @@ -365,7 +365,7 @@ public: // Auxiliary IAT symbols and chunks on ARM64EC. DefinedImportData *impECSym = nullptr; Chunk *auxLocation = nullptr; - Symbol *auxThunkSym = nullptr; + Defined *auxThunkSym = nullptr; DefinedImportData *auxImpCopySym = nullptr; Chunk *auxCopyLocation = nullptr; diff --git a/lld/COFF/MapFile.cpp b/lld/COFF/MapFile.cpp index 52e9ce996f23..751a2238e701 100644 --- a/lld/COFF/MapFile.cpp +++ b/lld/COFF/MapFile.cpp @@ -128,8 +128,7 @@ static void getSymbols(const COFFLinkerContext &ctx, if (!file->thunkSym->isLive()) continue; - if (auto *thunkSym = dyn_cast<Defined>(file->thunkSym)) - syms.push_back(thunkSym); + syms.push_back(file->thunkSym); if (auto *impSym = dyn_cast_or_null<Defined>(file->impSym)) syms.push_back(impSym); diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp index efea16ccbbfe..0ef58910151c 100644 --- a/lld/COFF/SymbolTable.cpp +++ b/lld/COFF/SymbolTable.cpp @@ -502,6 +502,14 @@ void SymbolTable::resolveRemainingUndefines() { // This odd rule is for compatibility with MSVC linker. if (name.starts_with("__imp_")) { Symbol *imp = find(name.substr(strlen("__imp_"))); + if (imp) { + // The unprefixed symbol might come later in symMap, so handle it now + // so that the condition below can be appropriately applied. + auto *undef = dyn_cast<Undefined>(imp); + if (undef) { + undef->resolveWeakAlias(); + } + } if (imp && isa<Defined>(imp)) { auto *d = cast<Defined>(imp); replaceSymbol<DefinedLocalImport>(sym, ctx, name, d); @@ -815,13 +823,13 @@ DefinedImportData *SymbolTable::addImportData(StringRef n, ImportFile *f, return nullptr; } -Symbol *SymbolTable::addImportThunk(StringRef name, DefinedImportData *id, - ImportThunkChunk *chunk) { +Defined *SymbolTable::addImportThunk(StringRef name, DefinedImportData *id, + ImportThunkChunk *chunk) { auto [s, wasInserted] = insert(name, nullptr); s->isUsedInRegularObj = true; if (wasInserted || isa<Undefined>(s) || s->isLazy()) { replaceSymbol<DefinedImportThunk>(s, ctx, name, id, chunk); - return s; + return cast<Defined>(s); } reportDuplicate(s, id->file); diff --git a/lld/COFF/SymbolTable.h b/lld/COFF/SymbolTable.h index bf97cf442039..e3f674b8098f 100644 --- a/lld/COFF/SymbolTable.h +++ b/lld/COFF/SymbolTable.h @@ -105,8 +105,8 @@ public: CommonChunk *c = nullptr); DefinedImportData *addImportData(StringRef n, ImportFile *f, Chunk *&location); - Symbol *addImportThunk(StringRef name, DefinedImportData *s, - ImportThunkChunk *chunk); + Defined *addImportThunk(StringRef name, DefinedImportData *s, + ImportThunkChunk *chunk); void addLibcall(StringRef name); void addEntryThunk(Symbol *from, Symbol *to); void addExitThunk(Symbol *from, Symbol *to); diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index d50eae9c0144..e25db0e49512 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -82,7 +82,7 @@ ConfigWrapper elf::config; Ctx elf::ctx; static void setConfigs(Ctx &ctx, opt::InputArgList &args); -static void readConfigs(opt::InputArgList &args); +static void readConfigs(Ctx &ctx, opt::InputArgList &args); void elf::errorOrWarn(const Twine &msg) { if (config->noinhibitExec) @@ -669,7 +669,7 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) { } } - readConfigs(args); + readConfigs(ctx, args); checkZOptions(args); // The behavior of -v or --version is a bit strange, but this is @@ -681,7 +681,7 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) { // Initialize time trace profiler. if (ctx.arg.timeTraceEnabled) - timeTraceProfilerInitialize(ctx.arg.timeTraceGranularity, config->progName); + timeTraceProfilerInitialize(ctx.arg.timeTraceGranularity, ctx.arg.progName); { llvm::TimeTraceScope timeScope("ExecuteLinker"); @@ -714,14 +714,14 @@ static std::string getRpath(opt::InputArgList &args) { // Determines what we should do if there are remaining unresolved // symbols after the name resolution. -static void setUnresolvedSymbolPolicy(opt::InputArgList &args) { +static void setUnresolvedSymbolPolicy(Ctx &ctx, opt::InputArgList &args) { UnresolvedPolicy errorOrWarn = args.hasFlag(OPT_error_unresolved_symbols, OPT_warn_unresolved_symbols, true) ? UnresolvedPolicy::ReportError : UnresolvedPolicy::Warn; // -shared implies --unresolved-symbols=ignore-all because missing // symbols are likely to be resolved at runtime. - bool diagRegular = !config->shared, diagShlib = !config->shared; + bool diagRegular = !ctx.arg.shared, diagShlib = !ctx.arg.shared; for (const opt::Arg *arg : args) { switch (arg->getOption().getID()) { @@ -765,9 +765,9 @@ static void setUnresolvedSymbolPolicy(opt::InputArgList &args) { } } - config->unresolvedSymbols = + ctx.arg.unresolvedSymbols = diagRegular ? errorOrWarn : UnresolvedPolicy::Ignore; - config->unresolvedSymbolsInShlib = + ctx.arg.unresolvedSymbolsInShlib = diagShlib ? errorOrWarn : UnresolvedPolicy::Ignore; } @@ -804,25 +804,25 @@ static DiscardPolicy getDiscard(opt::InputArgList &args) { return DiscardPolicy::None; } -static StringRef getDynamicLinker(opt::InputArgList &args) { +static StringRef getDynamicLinker(Ctx &ctx, opt::InputArgList &args) { auto *arg = args.getLastArg(OPT_dynamic_linker, OPT_no_dynamic_linker); if (!arg) return ""; if (arg->getOption().getID() == OPT_no_dynamic_linker) { // --no-dynamic-linker suppresses undefined weak symbols in .dynsym - config->noDynamicLinker = true; + ctx.arg.noDynamicLinker = true; return ""; } return arg->getValue(); } -static int getMemtagMode(opt::InputArgList &args) { +static int getMemtagMode(Ctx &ctx, opt::InputArgList &args) { StringRef memtagModeArg = args.getLastArgValue(OPT_android_memtag_mode); if (memtagModeArg.empty()) { - if (config->androidMemtagStack) + if (ctx.arg.androidMemtagStack) warn("--android-memtag-mode is unspecified, leaving " "--android-memtag-stack a no-op"); - else if (config->androidMemtagHeap) + else if (ctx.arg.androidMemtagHeap) warn("--android-memtag-mode is unspecified, leaving " "--android-memtag-heap a no-op"); return ELF::NT_MEMTAG_LEVEL_NONE; @@ -1247,140 +1247,141 @@ static bool remapInputs(StringRef line, const Twine &location) { } // Initializes Config members by the command line options. -static void readConfigs(opt::InputArgList &args) { +static void readConfigs(Ctx &ctx, opt::InputArgList &args) { errorHandler().verbose = args.hasArg(OPT_verbose); errorHandler().vsDiagnostics = args.hasArg(OPT_visual_studio_diagnostics_format, false); - config->allowMultipleDefinition = + ctx.arg.allowMultipleDefinition = hasZOption(args, "muldefs") || args.hasFlag(OPT_allow_multiple_definition, OPT_no_allow_multiple_definition, false); - config->androidMemtagHeap = + ctx.arg.androidMemtagHeap = args.hasFlag(OPT_android_memtag_heap, OPT_no_android_memtag_heap, false); - config->androidMemtagStack = args.hasFlag(OPT_android_memtag_stack, + ctx.arg.androidMemtagStack = args.hasFlag(OPT_android_memtag_stack, OPT_no_android_memtag_stack, false); - config->fatLTOObjects = + ctx.arg.fatLTOObjects = args.hasFlag(OPT_fat_lto_objects, OPT_no_fat_lto_objects, false); - config->androidMemtagMode = getMemtagMode(args); - config->auxiliaryList = args::getStrings(args, OPT_auxiliary); - config->armBe8 = args.hasArg(OPT_be8); + ctx.arg.androidMemtagMode = getMemtagMode(ctx, args); + ctx.arg.auxiliaryList = args::getStrings(args, OPT_auxiliary); + ctx.arg.armBe8 = args.hasArg(OPT_be8); if (opt::Arg *arg = args.getLastArg( OPT_Bno_symbolic, OPT_Bsymbolic_non_weak_functions, OPT_Bsymbolic_functions, OPT_Bsymbolic_non_weak, OPT_Bsymbolic)) { if (arg->getOption().matches(OPT_Bsymbolic_non_weak_functions)) - config->bsymbolic = BsymbolicKind::NonWeakFunctions; + ctx.arg.bsymbolic = BsymbolicKind::NonWeakFunctions; else if (arg->getOption().matches(OPT_Bsymbolic_functions)) - config->bsymbolic = BsymbolicKind::Functions; + ctx.arg.bsymbolic = BsymbolicKind::Functions; else if (arg->getOption().matches(OPT_Bsymbolic_non_weak)) - config->bsymbolic = BsymbolicKind::NonWeak; + ctx.arg.bsymbolic = BsymbolicKind::NonWeak; else if (arg->getOption().matches(OPT_Bsymbolic)) - config->bsymbolic = BsymbolicKind::All; + ctx.arg.bsymbolic = BsymbolicKind::All; } - config->callGraphProfileSort = getCGProfileSortKind(args); - config->checkSections = + ctx.arg.callGraphProfileSort = getCGProfileSortKind(args); + ctx.arg.checkSections = args.hasFlag(OPT_check_sections, OPT_no_check_sections, true); - config->chroot = args.getLastArgValue(OPT_chroot); + ctx.arg.chroot = args.getLastArgValue(OPT_chroot); if (auto *arg = args.getLastArg(OPT_compress_debug_sections)) { - config->compressDebugSections = + ctx.arg.compressDebugSections = getCompressionType(arg->getValue(), "--compress-debug-sections"); } - config->cref = args.hasArg(OPT_cref); - config->optimizeBBJumps = + ctx.arg.cref = args.hasArg(OPT_cref); + ctx.arg.optimizeBBJumps = args.hasFlag(OPT_optimize_bb_jumps, OPT_no_optimize_bb_jumps, false); - config->debugNames = args.hasFlag(OPT_debug_names, OPT_no_debug_names, false); - config->demangle = args.hasFlag(OPT_demangle, OPT_no_demangle, true); - config->dependencyFile = args.getLastArgValue(OPT_dependency_file); - config->dependentLibraries = args.hasFlag(OPT_dependent_libraries, OPT_no_dependent_libraries, true); - config->disableVerify = args.hasArg(OPT_disable_verify); - config->discard = getDiscard(args); - config->dwoDir = args.getLastArgValue(OPT_plugin_opt_dwo_dir_eq); - config->dynamicLinker = getDynamicLinker(args); - config->ehFrameHdr = + ctx.arg.debugNames = args.hasFlag(OPT_debug_names, OPT_no_debug_names, false); + ctx.arg.demangle = args.hasFlag(OPT_demangle, OPT_no_demangle, true); + ctx.arg.dependencyFile = args.getLastArgValue(OPT_dependency_file); + ctx.arg.dependentLibraries = + args.hasFlag(OPT_dependent_libraries, OPT_no_dependent_libraries, true); + ctx.arg.disableVerify = args.hasArg(OPT_disable_verify); + ctx.arg.discard = getDiscard(args); + ctx.arg.dwoDir = args.getLastArgValue(OPT_plugin_opt_dwo_dir_eq); + ctx.arg.dynamicLinker = getDynamicLinker(ctx, args); + ctx.arg.ehFrameHdr = args.hasFlag(OPT_eh_frame_hdr, OPT_no_eh_frame_hdr, false); - config->emitLLVM = args.hasArg(OPT_lto_emit_llvm); - config->emitRelocs = args.hasArg(OPT_emit_relocs); - config->enableNewDtags = + ctx.arg.emitLLVM = args.hasArg(OPT_lto_emit_llvm); + ctx.arg.emitRelocs = args.hasArg(OPT_emit_relocs); + ctx.arg.enableNewDtags = args.hasFlag(OPT_enable_new_dtags, OPT_disable_new_dtags, true); - config->enableNonContiguousRegions = + ctx.arg.enableNonContiguousRegions = args.hasArg(OPT_enable_non_contiguous_regions); - config->entry = args.getLastArgValue(OPT_entry); + ctx.arg.entry = args.getLastArgValue(OPT_entry); errorHandler().errorHandlingScript = args.getLastArgValue(OPT_error_handling_script); - config->executeOnly = + ctx.arg.executeOnly = args.hasFlag(OPT_execute_only, OPT_no_execute_only, false); - config->exportDynamic = + ctx.arg.exportDynamic = args.hasFlag(OPT_export_dynamic, OPT_no_export_dynamic, false) || args.hasArg(OPT_shared); - config->filterList = args::getStrings(args, OPT_filter); - config->fini = args.getLastArgValue(OPT_fini, "_fini"); - config->fixCortexA53Errata843419 = args.hasArg(OPT_fix_cortex_a53_843419) && - !args.hasArg(OPT_relocatable); - config->cmseImplib = args.hasArg(OPT_cmse_implib); - config->cmseInputLib = args.getLastArgValue(OPT_in_implib); - config->cmseOutputLib = args.getLastArgValue(OPT_out_implib); - config->fixCortexA8 = + ctx.arg.filterList = args::getStrings(args, OPT_filter); + ctx.arg.fini = args.getLastArgValue(OPT_fini, "_fini"); + ctx.arg.fixCortexA53Errata843419 = + args.hasArg(OPT_fix_cortex_a53_843419) && !args.hasArg(OPT_relocatable); + ctx.arg.cmseImplib = args.hasArg(OPT_cmse_implib); + ctx.arg.cmseInputLib = args.getLastArgValue(OPT_in_implib); + ctx.arg.cmseOutputLib = args.getLastArgValue(OPT_out_implib); + ctx.arg.fixCortexA8 = args.hasArg(OPT_fix_cortex_a8) && !args.hasArg(OPT_relocatable); - config->fortranCommon = + ctx.arg.fortranCommon = args.hasFlag(OPT_fortran_common, OPT_no_fortran_common, false); - config->gcSections = args.hasFlag(OPT_gc_sections, OPT_no_gc_sections, false); - config->gnuUnique = args.hasFlag(OPT_gnu_unique, OPT_no_gnu_unique, true); - config->gdbIndex = args.hasFlag(OPT_gdb_index, OPT_no_gdb_index, false); - config->icf = getICF(args); - config->ignoreDataAddressEquality = + ctx.arg.gcSections = args.hasFlag(OPT_gc_sections, OPT_no_gc_sections, false); + ctx.arg.gnuUnique = args.hasFlag(OPT_gnu_unique, OPT_no_gnu_unique, true); + ctx.arg.gdbIndex = args.hasFlag(OPT_gdb_index, OPT_no_gdb_index, false); + ctx.arg.icf = getICF(args); + ctx.arg.ignoreDataAddressEquality = args.hasArg(OPT_ignore_data_address_equality); - config->ignoreFunctionAddressEquality = + ctx.arg.ignoreFunctionAddressEquality = args.hasArg(OPT_ignore_function_address_equality); - config->init = args.getLastArgValue(OPT_init, "_init"); - config->ltoAAPipeline = args.getLastArgValue(OPT_lto_aa_pipeline); - config->ltoCSProfileGenerate = args.hasArg(OPT_lto_cs_profile_generate); - config->ltoCSProfileFile = args.getLastArgValue(OPT_lto_cs_profile_file); - config->ltoPGOWarnMismatch = args.hasFlag(OPT_lto_pgo_warn_mismatch, + ctx.arg.init = args.getLastArgValue(OPT_init, "_init"); + ctx.arg.ltoAAPipeline = args.getLastArgValue(OPT_lto_aa_pipeline); + ctx.arg.ltoCSProfileGenerate = args.hasArg(OPT_lto_cs_profile_generate); + ctx.arg.ltoCSProfileFile = args.getLastArgValue(OPT_lto_cs_profile_file); + ctx.arg.ltoPGOWarnMismatch = args.hasFlag(OPT_lto_pgo_warn_mismatch, OPT_no_lto_pgo_warn_mismatch, true); - config->ltoDebugPassManager = args.hasArg(OPT_lto_debug_pass_manager); - config->ltoEmitAsm = args.hasArg(OPT_lto_emit_asm); - config->ltoNewPmPasses = args.getLastArgValue(OPT_lto_newpm_passes); - config->ltoWholeProgramVisibility = + ctx.arg.ltoDebugPassManager = args.hasArg(OPT_lto_debug_pass_manager); + ctx.arg.ltoEmitAsm = args.hasArg(OPT_lto_emit_asm); + ctx.arg.ltoNewPmPasses = args.getLastArgValue(OPT_lto_newpm_passes); + ctx.arg.ltoWholeProgramVisibility = args.hasFlag(OPT_lto_whole_program_visibility, OPT_no_lto_whole_program_visibility, false); - config->ltoValidateAllVtablesHaveTypeInfos = + ctx.arg.ltoValidateAllVtablesHaveTypeInfos = args.hasFlag(OPT_lto_validate_all_vtables_have_type_infos, OPT_no_lto_validate_all_vtables_have_type_infos, false); - config->ltoo = args::getInteger(args, OPT_lto_O, 2); - if (config->ltoo > 3) - error("invalid optimization level for LTO: " + Twine(config->ltoo)); + ctx.arg.ltoo = args::getInteger(args, OPT_lto_O, 2); + if (ctx.arg.ltoo > 3) + error("invalid optimization level for LTO: " + Twine(ctx.arg.ltoo)); unsigned ltoCgo = - args::getInteger(args, OPT_lto_CGO, args::getCGOptLevel(config->ltoo)); + args::getInteger(args, OPT_lto_CGO, args::getCGOptLevel(ctx.arg.ltoo)); if (auto level = CodeGenOpt::getLevel(ltoCgo)) - config->ltoCgo = *level; + ctx.arg.ltoCgo = *level; else error("invalid codegen optimization level for LTO: " + Twine(ltoCgo)); - config->ltoObjPath = args.getLastArgValue(OPT_lto_obj_path_eq); - config->ltoPartitions = args::getInteger(args, OPT_lto_partitions, 1); - config->ltoSampleProfile = args.getLastArgValue(OPT_lto_sample_profile); - config->ltoBBAddrMap = + ctx.arg.ltoObjPath = args.getLastArgValue(OPT_lto_obj_path_eq); + ctx.arg.ltoPartitions = args::getInteger(args, OPT_lto_partitions, 1); + ctx.arg.ltoSampleProfile = args.getLastArgValue(OPT_lto_sample_profile); + ctx.arg.ltoBBAddrMap = args.hasFlag(OPT_lto_basic_block_address_map, OPT_no_lto_basic_block_address_map, false); - config->ltoBasicBlockSections = + ctx.arg.ltoBasicBlockSections = args.getLastArgValue(OPT_lto_basic_block_sections); - config->ltoUniqueBasicBlockSectionNames = + ctx.arg.ltoUniqueBasicBlockSectionNames = args.hasFlag(OPT_lto_unique_basic_block_section_names, OPT_no_lto_unique_basic_block_section_names, false); - config->mapFile = args.getLastArgValue(OPT_Map); - config->mipsGotSize = args::getInteger(args, OPT_mips_got_size, 0xfff0); - config->mergeArmExidx = + ctx.arg.mapFile = args.getLastArgValue(OPT_Map); + ctx.arg.mipsGotSize = args::getInteger(args, OPT_mips_got_size, 0xfff0); + ctx.arg.mergeArmExidx = args.hasFlag(OPT_merge_exidx_entries, OPT_no_merge_exidx_entries, true); - config->mmapOutputFile = + ctx.arg.mmapOutputFile = args.hasFlag(OPT_mmap_output_file, OPT_no_mmap_output_file, true); - config->nmagic = args.hasFlag(OPT_nmagic, OPT_no_nmagic, false); - config->noinhibitExec = args.hasArg(OPT_noinhibit_exec); - config->nostdlib = args.hasArg(OPT_nostdlib); - config->oFormatBinary = isOutputFormatBinary(args); - config->omagic = args.hasFlag(OPT_omagic, OPT_no_omagic, false); - config->optRemarksFilename = args.getLastArgValue(OPT_opt_remarks_filename); - config->optStatsFilename = args.getLastArgValue(OPT_plugin_opt_stats_file); + ctx.arg.nmagic = args.hasFlag(OPT_nmagic, OPT_no_nmagic, false); + ctx.arg.noinhibitExec = args.hasArg(OPT_noinhibit_exec); + ctx.arg.nostdlib = args.hasArg(OPT_nostdlib); + ctx.arg.oFormatBinary = isOutputFormatBinary(args); + ctx.arg.omagic = args.hasFlag(OPT_omagic, OPT_no_omagic, false); + ctx.arg.optRemarksFilename = args.getLastArgValue(OPT_opt_remarks_filename); + ctx.arg.optStatsFilename = args.getLastArgValue(OPT_plugin_opt_stats_file); // Parse remarks hotness threshold. Valid value is either integer or 'auto'. if (auto *arg = args.getLastArg(OPT_opt_remarks_hotness_threshold)) { @@ -1389,77 +1390,77 @@ static void readConfigs(opt::InputArgList &args) { error(arg->getSpelling() + ": invalid argument '" + arg->getValue() + "', only integer or 'auto' is supported"); else - config->optRemarksHotnessThreshold = *resultOrErr; - } - - config->optRemarksPasses = args.getLastArgValue(OPT_opt_remarks_passes); - config->optRemarksWithHotness = args.hasArg(OPT_opt_remarks_with_hotness); - config->optRemarksFormat = args.getLastArgValue(OPT_opt_remarks_format); - config->optimize = args::getInteger(args, OPT_O, 1); - config->orphanHandling = getOrphanHandling(args); - config->outputFile = args.getLastArgValue(OPT_o); - config->packageMetadata = args.getLastArgValue(OPT_package_metadata); - config->pie = args.hasFlag(OPT_pie, OPT_no_pie, false); - config->printIcfSections = + ctx.arg.optRemarksHotnessThreshold = *resultOrErr; + } + + ctx.arg.optRemarksPasses = args.getLastArgValue(OPT_opt_remarks_passes); + ctx.arg.optRemarksWithHotness = args.hasArg(OPT_opt_remarks_with_hotness); + ctx.arg.optRemarksFormat = args.getLastArgValue(OPT_opt_remarks_format); + ctx.arg.optimize = args::getInteger(args, OPT_O, 1); + ctx.arg.orphanHandling = getOrphanHandling(args); + ctx.arg.outputFile = args.getLastArgValue(OPT_o); + ctx.arg.packageMetadata = args.getLastArgValue(OPT_package_metadata); + ctx.arg.pie = args.hasFlag(OPT_pie, OPT_no_pie, false); + ctx.arg.printIcfSections = args.hasFlag(OPT_print_icf_sections, OPT_no_print_icf_sections, false); - config->printGcSections = + ctx.arg.printGcSections = args.hasFlag(OPT_print_gc_sections, OPT_no_print_gc_sections, false); - config->printMemoryUsage = args.hasArg(OPT_print_memory_usage); - config->printArchiveStats = args.getLastArgValue(OPT_print_archive_stats); - config->printSymbolOrder = - args.getLastArgValue(OPT_print_symbol_order); - config->rejectMismatch = !args.hasArg(OPT_no_warn_mismatch); - config->relax = args.hasFlag(OPT_relax, OPT_no_relax, true); - config->relaxGP = args.hasFlag(OPT_relax_gp, OPT_no_relax_gp, false); - config->rpath = getRpath(args); - config->relocatable = args.hasArg(OPT_relocatable); - config->resolveGroups = + ctx.arg.printMemoryUsage = args.hasArg(OPT_print_memory_usage); + ctx.arg.printArchiveStats = args.getLastArgValue(OPT_print_archive_stats); + ctx.arg.printSymbolOrder = args.getLastArgValue(OPT_print_symbol_order); + ctx.arg.rejectMismatch = !args.hasArg(OPT_no_warn_mismatch); + ctx.arg.relax = args.hasFlag(OPT_relax, OPT_no_relax, true); + ctx.arg.relaxGP = args.hasFlag(OPT_relax_gp, OPT_no_relax_gp, false); + ctx.arg.rpath = getRpath(args); + ctx.arg.relocatable = args.hasArg(OPT_relocatable); + ctx.arg.resolveGroups = !args.hasArg(OPT_relocatable) || args.hasArg(OPT_force_group_allocation); if (args.hasArg(OPT_save_temps)) { // --save-temps implies saving all temps. for (const char *s : saveTempsValues) - config->saveTempsArgs.insert(s); + ctx.arg.saveTempsArgs.insert(s); } else { for (auto *arg : args.filtered(OPT_save_temps_eq)) { StringRef s = arg->getValue(); if (llvm::is_contained(saveTempsValues, s)) - config->saveTempsArgs.insert(s); + ctx.arg.saveTempsArgs.insert(s); else error("unknown --save-temps value: " + s); } } - config->searchPaths = args::getStrings(args, OPT_library_path); - config->sectionStartMap = getSectionStartMap(args); - config->shared = args.hasArg(OPT_shared); - config->singleRoRx = !args.hasFlag(OPT_rosegment, OPT_no_rosegment, true); - config->soName = args.getLastArgValue(OPT_soname); - config->sortSection = getSortSection(args); - config->splitStackAdjustSize = args::getInteger(args, OPT_split_stack_adjust_size, 16384); - config->zSectionHeader = + ctx.arg.searchPaths = args::getStrings(args, OPT_library_path); + ctx.arg.sectionStartMap = getSectionStartMap(args); + ctx.arg.shared = args.hasArg(OPT_shared); + ctx.arg.singleRoRx = !args.hasFlag(OPT_rosegment, OPT_no_rosegment, true); + ctx.arg.soName = args.getLastArgValue(OPT_soname); + ctx.arg.sortSection = getSortSection(args); + ctx.arg.splitStackAdjustSize = + args::getInteger(args, OPT_split_stack_adjust_size, 16384); + ctx.arg.zSectionHeader = getZFlag(args, "sectionheader", "nosectionheader", true); - config->strip = getStrip(args); // needs zSectionHeader - config->sysroot = args.getLastArgValue(OPT_sysroot); - config->target1Rel = args.hasFlag(OPT_target1_rel, OPT_target1_abs, false); - config->target2 = getTarget2(args); - config->thinLTOCacheDir = args.getLastArgValue(OPT_thinlto_cache_dir); - config->thinLTOCachePolicy = CHECK( + ctx.arg.strip = getStrip(args); // needs zSectionHeader + ctx.arg.sysroot = args.getLastArgValue(OPT_sysroot); + ctx.arg.target1Rel = args.hasFlag(OPT_target1_rel, OPT_target1_abs, false); + ctx.arg.target2 = getTarget2(args); + ctx.arg.thinLTOCacheDir = args.getLastArgValue(OPT_thinlto_cache_dir); + ctx.arg.thinLTOCachePolicy = CHECK( parseCachePruningPolicy(args.getLastArgValue(OPT_thinlto_cache_policy)), "--thinlto-cache-policy: invalid cache policy"); - config->thinLTOEmitImportsFiles = args.hasArg(OPT_thinlto_emit_imports_files); - config->thinLTOEmitIndexFiles = args.hasArg(OPT_thinlto_emit_index_files) || + ctx.arg.thinLTOEmitImportsFiles = args.hasArg(OPT_thinlto_emit_imports_files); + ctx.arg.thinLTOEmitIndexFiles = args.hasArg(OPT_thinlto_emit_index_files) || args.hasArg(OPT_thinlto_index_only) || args.hasArg(OPT_thinlto_index_only_eq); - config->thinLTOIndexOnly = args.hasArg(OPT_thinlto_index_only) || + ctx.arg.thinLTOIndexOnly = args.hasArg(OPT_thinlto_index_only) || args.hasArg(OPT_thinlto_index_only_eq); - config->thinLTOIndexOnlyArg = args.getLastArgValue(OPT_thinlto_index_only_eq); - config->thinLTOObjectSuffixReplace = + ctx.arg.thinLTOIndexOnlyArg = args.getLastArgValue(OPT_thinlto_index_only_eq); + ctx.arg.thinLTOObjectSuffixReplace = getOldNewOptions(args, OPT_thinlto_object_suffix_replace_eq); - std::tie(config->thinLTOPrefixReplaceOld, config->thinLTOPrefixReplaceNew, - config->thinLTOPrefixReplaceNativeObject) = + std::tie(ctx.arg.thinLTOPrefixReplaceOld, ctx.arg.thinLTOPrefixReplaceNew, + ctx.arg.thinLTOPrefixReplaceNativeObject) = getOldNewOptionsExtra(args, OPT_thinlto_prefix_replace_eq); - if (config->thinLTOEmitIndexFiles && !config->thinLTOIndexOnly) { + if (ctx.arg.thinLTOEmitIndexFiles && !ctx.arg.thinLTOIndexOnly) { if (args.hasArg(OPT_thinlto_object_suffix_replace_eq)) error("--thinlto-object-suffix-replace is not supported with " "--thinlto-emit-index-files"); @@ -1467,69 +1468,69 @@ static void readConfigs(opt::InputArgList &args) { error("--thinlto-prefix-replace is not supported with " "--thinlto-emit-index-files"); } - if (!config->thinLTOPrefixReplaceNativeObject.empty() && - config->thinLTOIndexOnlyArg.empty()) { + if (!ctx.arg.thinLTOPrefixReplaceNativeObject.empty() && + ctx.arg.thinLTOIndexOnlyArg.empty()) { error("--thinlto-prefix-replace=old_dir;new_dir;obj_dir must be used with " "--thinlto-index-only="); } - config->thinLTOModulesToCompile = + ctx.arg.thinLTOModulesToCompile = args::getStrings(args, OPT_thinlto_single_module_eq); - config->timeTraceEnabled = args.hasArg(OPT_time_trace_eq); - config->timeTraceGranularity = + ctx.arg.timeTraceEnabled = args.hasArg(OPT_time_trace_eq); + ctx.arg.timeTraceGranularity = args::getInteger(args, OPT_time_trace_granularity, 500); - config->trace = args.hasArg(OPT_trace); - config->undefined = args::getStrings(args, OPT_undefined); - config->undefinedVersion = + ctx.arg.trace = args.hasArg(OPT_trace); + ctx.arg.undefined = args::getStrings(args, OPT_undefined); + ctx.arg.undefinedVersion = args.hasFlag(OPT_undefined_version, OPT_no_undefined_version, false); - config->unique = args.hasArg(OPT_unique); - config->useAndroidRelrTags = args.hasFlag( + ctx.arg.unique = args.hasArg(OPT_unique); + ctx.arg.useAndroidRelrTags = args.hasFlag( OPT_use_android_relr_tags, OPT_no_use_android_relr_tags, false); - config->warnBackrefs = + ctx.arg.warnBackrefs = args.hasFlag(OPT_warn_backrefs, OPT_no_warn_backrefs, false); - config->warnCommon = args.hasFlag(OPT_warn_common, OPT_no_warn_common, false); - config->warnSymbolOrdering = + ctx.arg.warnCommon = args.hasFlag(OPT_warn_common, OPT_no_warn_common, false); + ctx.arg.warnSymbolOrdering = args.hasFlag(OPT_warn_symbol_ordering, OPT_no_warn_symbol_ordering, true); - config->whyExtract = args.getLastArgValue(OPT_why_extract); - config->zCombreloc = getZFlag(args, "combreloc", "nocombreloc", true); - config->zCopyreloc = getZFlag(args, "copyreloc", "nocopyreloc", true); - config->zForceBti = hasZOption(args, "force-bti"); - config->zForceIbt = hasZOption(args, "force-ibt"); - config->zGcs = getZGcs(args); - config->zGlobal = hasZOption(args, "global"); - config->zGnustack = getZGnuStack(args); - config->zHazardplt = hasZOption(args, "hazardplt"); - config->zIfuncNoplt = hasZOption(args, "ifunc-noplt"); - config->zInitfirst = hasZOption(args, "initfirst"); - config->zInterpose = hasZOption(args, "interpose"); - config->zKeepTextSectionPrefix = getZFlag( + ctx.arg.whyExtract = args.getLastArgValue(OPT_why_extract); + ctx.arg.zCombreloc = getZFlag(args, "combreloc", "nocombreloc", true); + ctx.arg.zCopyreloc = getZFlag(args, "copyreloc", "nocopyreloc", true); + ctx.arg.zForceBti = hasZOption(args, "force-bti"); + ctx.arg.zForceIbt = hasZOption(args, "force-ibt"); + ctx.arg.zGcs = getZGcs(args); + ctx.arg.zGlobal = hasZOption(args, "global"); + ctx.arg.zGnustack = getZGnuStack(args); + ctx.arg.zHazardplt = hasZOption(args, "hazardplt"); + ctx.arg.zIfuncNoplt = hasZOption(args, "ifunc-noplt"); + ctx.arg.zInitfirst = hasZOption(args, "initfirst"); + ctx.arg.zInterpose = hasZOption(args, "interpose"); + ctx.arg.zKeepTextSectionPrefix = getZFlag( args, "keep-text-section-prefix", "nokeep-text-section-prefix", false); - config->zLrodataAfterBss = + ctx.arg.zLrodataAfterBss = getZFlag(args, "lrodata-after-bss", "nolrodata-after-bss", false); - config->zNodefaultlib = hasZOption(args, "nodefaultlib"); - config->zNodelete = hasZOption(args, "nodelete"); - config->zNodlopen = hasZOption(args, "nodlopen"); - config->zNow = getZFlag(args, "now", "lazy", false); - config->zOrigin = hasZOption(args, "origin"); - config->zPacPlt = hasZOption(args, "pac-plt"); - config->zRelro = getZFlag(args, "relro", "norelro", true); - config->zRetpolineplt = hasZOption(args, "retpolineplt"); - config->zRodynamic = hasZOption(args, "rodynamic"); - config->zSeparate = getZSeparate(args); - config->zShstk = hasZOption(args, "shstk"); - config->zStackSize = args::getZOptionValue(args, OPT_z, "stack-size", 0); - config->zStartStopGC = + ctx.arg.zNodefaultlib = hasZOption(args, "nodefaultlib"); + ctx.arg.zNodelete = hasZOption(args, "nodelete"); + ctx.arg.zNodlopen = hasZOption(args, "nodlopen"); + ctx.arg.zNow = getZFlag(args, "now", "lazy", false); + ctx.arg.zOrigin = hasZOption(args, "origin"); + ctx.arg.zPacPlt = hasZOption(args, "pac-plt"); + ctx.arg.zRelro = getZFlag(args, "relro", "norelro", true); + ctx.arg.zRetpolineplt = hasZOption(args, "retpolineplt"); + ctx.arg.zRodynamic = hasZOption(args, "rodynamic"); + ctx.arg.zSeparate = getZSeparate(args); + ctx.arg.zShstk = hasZOption(args, "shstk"); + ctx.arg.zStackSize = args::getZOptionValue(args, OPT_z, "stack-size", 0); + ctx.arg.zStartStopGC = getZFlag(args, "start-stop-gc", "nostart-stop-gc", true); - config->zStartStopVisibility = getZStartStopVisibility(args); - config->zText = getZFlag(args, "text", "notext", true); - config->zWxneeded = hasZOption(args, "wxneeded"); - setUnresolvedSymbolPolicy(args); - config->power10Stubs = args.getLastArgValue(OPT_power10_stubs_eq) != "no"; + ctx.arg.zStartStopVisibility = getZStartStopVisibility(args); + ctx.arg.zText = getZFlag(args, "text", "notext", true); + ctx.arg.zWxneeded = hasZOption(args, "wxneeded"); + setUnresolvedSymbolPolicy(ctx, args); + ctx.arg.power10Stubs = args.getLastArgValue(OPT_power10_stubs_eq) != "no"; if (opt::Arg *arg = args.getLastArg(OPT_eb, OPT_el)) { if (arg->getOption().matches(OPT_eb)) - config->optEB = true; + ctx.arg.optEB = true; else - config->optEL = true; + ctx.arg.optEL = true; } for (opt::Arg *arg : args.filtered(OPT_remap_inputs)) { @@ -1560,15 +1561,15 @@ static void readConfigs(opt::InputArgList &args) { if (!to_integer(kv.second, v)) error(errPrefix + "expected an integer, but got '" + kv.second + "'"); else if (Expected<GlobPattern> pat = GlobPattern::create(kv.first)) - config->shuffleSections.emplace_back(std::move(*pat), uint32_t(v)); + ctx.arg.shuffleSections.emplace_back(std::move(*pat), uint32_t(v)); else error(errPrefix + toString(pat.takeError()) + ": " + kv.first); } - auto reports = {std::make_pair("bti-report", &config->zBtiReport), - std::make_pair("cet-report", &config->zCetReport), - std::make_pair("gcs-report", &config->zGcsReport), - std::make_pair("pauth-report", &config->zPauthReport)}; + auto reports = {std::make_pair("bti-report", &ctx.arg.zBtiReport), + std::make_pair("cet-report", &ctx.arg.zCetReport), + std::make_pair("gcs-report", &ctx.arg.zGcsReport), + std::make_pair("pauth-report", &ctx.arg.zPauthReport)}; for (opt::Arg *arg : args.filtered(OPT_z)) { std::pair<StringRef, StringRef> option = StringRef(arg->getValue()).split('='); @@ -1603,7 +1604,7 @@ static void readConfigs(opt::InputArgList &args) { levelStr + "'"); } if (Expected<GlobPattern> pat = GlobPattern::create(fields[0])) { - config->compressSections.emplace_back(std::move(*pat), type, level); + ctx.arg.compressSections.emplace_back(std::move(*pat), type, level); } else { error(arg->getSpelling() + ": " + toString(pat.takeError())); continue; @@ -1627,7 +1628,7 @@ static void readConfigs(opt::InputArgList &args) { error(errPrefix + "expected a non-negative integer, but got '" + kv.second + "'"); else if (Expected<GlobPattern> pat = GlobPattern::create(kv.first)) - config->deadRelocInNonAlloc.emplace_back(std::move(*pat), v); + ctx.arg.deadRelocInNonAlloc.emplace_back(std::move(*pat), v); else error(errPrefix + toString(pat.takeError()) + ": " + kv.first); } @@ -1653,23 +1654,23 @@ static void readConfigs(opt::InputArgList &args) { "'"); } - config->passPlugins = args::getStrings(args, OPT_load_pass_plugins); + ctx.arg.passPlugins = args::getStrings(args, OPT_load_pass_plugins); // Parse -mllvm options. for (const auto *arg : args.filtered(OPT_mllvm)) { parseClangOption(arg->getValue(), arg->getSpelling()); - config->mllvmOpts.emplace_back(arg->getValue()); + ctx.arg.mllvmOpts.emplace_back(arg->getValue()); } - config->ltoKind = LtoKind::Default; + ctx.arg.ltoKind = LtoKind::Default; if (auto *arg = args.getLastArg(OPT_lto)) { StringRef s = arg->getValue(); if (s == "thin") - config->ltoKind = LtoKind::UnifiedThin; + ctx.arg.ltoKind = LtoKind::UnifiedThin; else if (s == "full") - config->ltoKind = LtoKind::UnifiedRegular; + ctx.arg.ltoKind = LtoKind::UnifiedRegular; else if (s == "default") - config->ltoKind = LtoKind::Default; + ctx.arg.ltoKind = LtoKind::Default; else error("unknown LTO mode: " + s); } @@ -1685,21 +1686,21 @@ static void readConfigs(opt::InputArgList &args) { error(arg->getSpelling() + ": expected a positive integer, but got '" + arg->getValue() + "'"); parallel::strategy = hardware_concurrency(threads); - config->thinLTOJobs = v; + ctx.arg.thinLTOJobs = v; } else if (parallel::strategy.compute_thread_count() > 16) { log("set maximum concurrency to 16, specify --threads= to change"); parallel::strategy = hardware_concurrency(16); } if (auto *arg = args.getLastArg(OPT_thinlto_jobs_eq)) - config->thinLTOJobs = arg->getValue(); - config->threadCount = parallel::strategy.compute_thread_count(); + ctx.arg.thinLTOJobs = arg->getValue(); + ctx.arg.threadCount = parallel::strategy.compute_thread_count(); - if (config->ltoPartitions == 0) + if (ctx.arg.ltoPartitions == 0) error("--lto-partitions: number of threads must be > 0"); - if (!get_threadpool_strategy(config->thinLTOJobs)) - error("--thinlto-jobs: invalid job count: " + config->thinLTOJobs); + if (!get_threadpool_strategy(ctx.arg.thinLTOJobs)) + error("--thinlto-jobs: invalid job count: " + ctx.arg.thinLTOJobs); - if (config->splitStackAdjustSize < 0) + if (ctx.arg.splitStackAdjustSize < 0) error("--split-stack-adjust-size: size must be >= 0"); // The text segment is traditionally the first segment, whose address equals @@ -1713,42 +1714,42 @@ static void readConfigs(opt::InputArgList &args) { // Parse ELF{32,64}{LE,BE} and CPU type. if (auto *arg = args.getLastArg(OPT_m)) { StringRef s = arg->getValue(); - std::tie(config->ekind, config->emachine, config->osabi) = + std::tie(ctx.arg.ekind, ctx.arg.emachine, ctx.arg.osabi) = parseEmulation(s); - config->mipsN32Abi = + ctx.arg.mipsN32Abi = (s.starts_with("elf32btsmipn32") || s.starts_with("elf32ltsmipn32")); - config->emulation = s; + ctx.arg.emulation = s; } // Parse --hash-style={sysv,gnu,both}. if (auto *arg = args.getLastArg(OPT_hash_style)) { StringRef s = arg->getValue(); if (s == "sysv") - config->sysvHash = true; + ctx.arg.sysvHash = true; else if (s == "gnu") - config->gnuHash = true; + ctx.arg.gnuHash = true; else if (s == "both") - config->sysvHash = config->gnuHash = true; + ctx.arg.sysvHash = ctx.arg.gnuHash = true; else error("unknown --hash-style: " + s); } if (args.hasArg(OPT_print_map)) - config->mapFile = "-"; + ctx.arg.mapFile = "-"; // Page alignment can be disabled by the -n (--nmagic) and -N (--omagic). // As PT_GNU_RELRO relies on Paging, do not create it when we have disabled // it. Also disable RELRO for -r. - if (config->nmagic || config->omagic || config->relocatable) - config->zRelro = false; + if (ctx.arg.nmagic || ctx.arg.omagic || ctx.arg.relocatable) + ctx.arg.zRelro = false; - std::tie(config->buildId, config->buildIdVector) = getBuildId(args); + std::tie(ctx.arg.buildId, ctx.arg.buildIdVector) = getBuildId(args); if (getZFlag(args, "pack-relative-relocs", "nopack-relative-relocs", false)) { - config->relrGlibc = true; - config->relrPackDynRelocs = true; + ctx.arg.relrGlibc = true; + ctx.arg.relrPackDynRelocs = true; } else { - std::tie(config->androidPackDynRelocs, config->relrPackDynRelocs) = + std::tie(ctx.arg.androidPackDynRelocs, ctx.arg.relrPackDynRelocs) = getPackDynRelocs(args); } @@ -1757,34 +1758,34 @@ static void readConfigs(opt::InputArgList &args) { error("--symbol-ordering-file and --call-graph-order-file " "may not be used together"); if (std::optional<MemoryBufferRef> buffer = readFile(arg->getValue())) { - config->symbolOrderingFile = getSymbolOrderingFile(*buffer); + ctx.arg.symbolOrderingFile = getSymbolOrderingFile(*buffer); // Also need to disable CallGraphProfileSort to prevent // LLD order symbols with CGProfile - config->callGraphProfileSort = CGProfileSortKind::None; + ctx.arg.callGraphProfileSort = CGProfileSortKind::None; } } - assert(config->versionDefinitions.empty()); - config->versionDefinitions.push_back( + assert(ctx.arg.versionDefinitions.empty()); + ctx.arg.versionDefinitions.push_back( {"local", (uint16_t)VER_NDX_LOCAL, {}, {}}); - config->versionDefinitions.push_back( + ctx.arg.versionDefinitions.push_back( {"global", (uint16_t)VER_NDX_GLOBAL, {}, {}}); // If --retain-symbol-file is used, we'll keep only the symbols listed in // the file and discard all others. if (auto *arg = args.getLastArg(OPT_retain_symbols_file)) { - config->versionDefinitions[VER_NDX_LOCAL].nonLocalPatterns.push_back( + ctx.arg.versionDefinitions[VER_NDX_LOCAL].nonLocalPatterns.push_back( {"*", /*isExternCpp=*/false, /*hasWildcard=*/true}); if (std::optional<MemoryBufferRef> buffer = readFile(arg->getValue())) for (StringRef s : args::getLines(*buffer)) - config->versionDefinitions[VER_NDX_GLOBAL].nonLocalPatterns.push_back( + ctx.arg.versionDefinitions[VER_NDX_GLOBAL].nonLocalPatterns.push_back( {s, /*isExternCpp=*/false, /*hasWildcard=*/false}); } for (opt::Arg *arg : args.filtered(OPT_warn_backrefs_exclude)) { StringRef pattern(arg->getValue()); if (Expected<GlobPattern> pat = GlobPattern::create(pattern)) - config->warnBackrefsExclude.push_back(std::move(*pat)); + ctx.arg.warnBackrefsExclude.push_back(std::move(*pat)); else error(arg->getSpelling() + ": " + toString(pat.takeError()) + ": " + pattern); @@ -1796,15 +1797,15 @@ static void readConfigs(opt::InputArgList &args) { // even if other options express a symbolic intention: -Bsymbolic, // -Bsymbolic-functions (if STT_FUNC), --dynamic-list. for (auto *arg : args.filtered(OPT_export_dynamic_symbol)) - config->dynamicList.push_back( + ctx.arg.dynamicList.push_back( {arg->getValue(), /*isExternCpp=*/false, /*hasWildcard=*/hasWildcard(arg->getValue())}); // --export-dynamic-symbol-list specifies a list of --export-dynamic-symbol // patterns. --dynamic-list is --export-dynamic-symbol-list plus -Bsymbolic // like semantics. - config->symbolic = - config->bsymbolic == BsymbolicKind::All || args.hasArg(OPT_dynamic_list); + ctx.arg.symbolic = + ctx.arg.bsymbolic == BsymbolicKind::All || args.hasArg(OPT_dynamic_list); for (auto *arg : args.filtered(OPT_dynamic_list, OPT_export_dynamic_symbol_list)) if (std::optional<MemoryBufferRef> buffer = readFile(arg->getValue())) @@ -2067,14 +2068,14 @@ void LinkerDriver::inferMachineType() { // Parse -z max-page-size=<value>. The default value is defined by // each target. -static uint64_t getMaxPageSize(opt::InputArgList &args) { +static uint64_t getMaxPageSize(Ctx &ctx, opt::InputArgList &args) { uint64_t val = args::getZOptionValue(args, OPT_z, "max-page-size", ctx.target->defaultMaxPageSize); if (!isPowerOf2_64(val)) { error("max-page-size: value isn't a power of 2"); return ctx.target->defaultMaxPageSize; } - if (config->nmagic || config->omagic) { + if (ctx.arg.nmagic || ctx.arg.omagic) { if (val != ctx.target->defaultMaxPageSize) warn("-z max-page-size set, but paging disabled by omagic or nmagic"); return 1; @@ -2084,7 +2085,7 @@ static uint64_t getMaxPageSize(opt::InputArgList &args) { // Parse -z common-page-size=<value>. The default value is defined by // each target. -static uint64_t getCommonPageSize(opt::InputArgList &args) { +static uint64_t getCommonPageSize(Ctx &ctx, opt::InputArgList &args) { uint64_t val = args::getZOptionValue(args, OPT_z, "common-page-size", ctx.target->defaultCommonPageSize); if (!isPowerOf2_64(val)) { @@ -2103,7 +2104,7 @@ static uint64_t getCommonPageSize(opt::InputArgList &args) { } // Parses --image-base option. -static std::optional<uint64_t> getImageBase(opt::InputArgList &args) { +static std::optional<uint64_t> getImageBase(Ctx &ctx, opt::InputArgList &args) { // Because we are using "Config->maxPageSize" here, this function has to be // called after the variable is initialized. auto *arg = args.getLastArg(OPT_image_base); @@ -2116,7 +2117,7 @@ static std::optional<uint64_t> getImageBase(opt::InputArgList &args) { error("--image-base: number expected, but got " + s); return 0; } - if ((v % config->maxPageSize) != 0) + if ((v % ctx.arg.maxPageSize) != 0) warn("--image-base: address isn't multiple of page size: " + s); return v; } @@ -2169,7 +2170,7 @@ static void excludeLibs(opt::InputArgList &args) { } // Force Sym to be entered in the output. -static void handleUndefined(Symbol *sym, const char *option) { +static void handleUndefined(Ctx &ctx, Symbol *sym, const char *option) { // Since a symbol may not be used inside the program, LTO may // eliminate it. Mark the symbol as "used" to prevent it. sym->isUsedInRegularObj = true; @@ -2177,14 +2178,14 @@ static void handleUndefined(Symbol *sym, const char *option) { if (!sym->isLazy()) return; sym->extract(); - if (!config->whyExtract.empty()) + if (!ctx.arg.whyExtract.empty()) ctx.whyExtractRecords.emplace_back(option, sym->file, *sym); } // As an extension to GNU linkers, lld supports a variant of `-u` // which accepts wildcard patterns. All symbols that match a given // pattern are handled as if they were given by `-u`. -static void handleUndefinedGlob(StringRef arg) { +static void handleUndefinedGlob(Ctx &ctx, StringRef arg) { Expected<GlobPattern> pat = GlobPattern::create(arg); if (!pat) { error("--undefined-glob: " + toString(pat.takeError()) + ": " + arg); @@ -2199,26 +2200,26 @@ static void handleUndefinedGlob(StringRef arg) { syms.push_back(sym); for (Symbol *sym : syms) - handleUndefined(sym, "--undefined-glob"); + handleUndefined(ctx, sym, "--undefined-glob"); } -static void handleLibcall(StringRef name) { +static void handleLibcall(Ctx &ctx, StringRef name) { Symbol *sym = symtab.find(name); if (sym && sym->isLazy() && isa<BitcodeFile>(sym->file)) { - if (!config->whyExtract.empty()) + if (!ctx.arg.whyExtract.empty()) ctx.whyExtractRecords.emplace_back("<libcall>", sym->file, *sym); sym->extract(); } } -static void writeArchiveStats() { - if (config->printArchiveStats.empty()) +static void writeArchiveStats(Ctx &ctx) { + if (ctx.arg.printArchiveStats.empty()) return; std::error_code ec; - raw_fd_ostream os = ctx.openAuxiliaryFile(config->printArchiveStats, ec); + raw_fd_ostream os = ctx.openAuxiliaryFile(ctx.arg.printArchiveStats, ec); if (ec) { - error("--print-archive-stats=: cannot open " + config->printArchiveStats + + error("--print-archive-stats=: cannot open " + ctx.arg.printArchiveStats + ": " + ec.message()); return; } @@ -2241,14 +2242,14 @@ static void writeArchiveStats() { } } -static void writeWhyExtract() { - if (config->whyExtract.empty()) +static void writeWhyExtract(Ctx &ctx) { + if (ctx.arg.whyExtract.empty()) return; std::error_code ec; - raw_fd_ostream os = ctx.openAuxiliaryFile(config->whyExtract, ec); + raw_fd_ostream os = ctx.openAuxiliaryFile(ctx.arg.whyExtract, ec); if (ec) { - error("cannot open --why-extract= file " + config->whyExtract + ": " + + error("cannot open --why-extract= file " + ctx.arg.whyExtract + ": " + ec.message()); return; } @@ -2260,7 +2261,7 @@ static void writeWhyExtract() { } } -static void reportBackrefs() { +static void reportBackrefs(Ctx &ctx) { for (auto &ref : ctx.backwardReferences) { const Symbol &sym = *ref.first; std::string to = toString(ref.second.second); @@ -2268,7 +2269,7 @@ static void reportBackrefs() { // with --warn-backrefs-exclude=. The value may look like (for --start-lib) // *.o or (archive member) *.a(*.o). bool exclude = false; - for (const llvm::GlobPattern &pat : config->warnBackrefsExclude) + for (const llvm::GlobPattern &pat : ctx.arg.warnBackrefsExclude) if (pat.match(to)) { exclude = true; break; @@ -2303,11 +2304,11 @@ static void reportBackrefs() { // part of your program. By using --dependency-file option, you can make // lld to dump dependency info so that you can maintain exact dependencies // easily. -static void writeDependencyFile() { +static void writeDependencyFile(Ctx &ctx) { std::error_code ec; - raw_fd_ostream os = ctx.openAuxiliaryFile(config->dependencyFile, ec); + raw_fd_ostream os = ctx.openAuxiliaryFile(ctx.arg.dependencyFile, ec); if (ec) { - error("cannot open " + config->dependencyFile + ": " + ec.message()); + error("cannot open " + ctx.arg.dependencyFile + ": " + ec.message()); return; } @@ -2334,14 +2335,14 @@ static void writeDependencyFile() { } }; - os << config->outputFile << ":"; - for (StringRef path : config->dependencyFiles) { + os << ctx.arg.outputFile << ":"; + for (StringRef path : ctx.arg.dependencyFiles) { os << " \\\n "; printFilename(os, path); } os << "\n"; - for (StringRef path : config->dependencyFiles) { + for (StringRef path : ctx.arg.dependencyFiles) { os << "\n"; printFilename(os, path); os << ":\n"; @@ -2352,7 +2353,7 @@ static void writeDependencyFile() { // This function is called after all symbol names are resolved. As a // result, the passes after the symbol resolution won't see any // symbols of type CommonSymbol. -static void replaceCommonSymbols() { +static void replaceCommonSymbols(Ctx &ctx) { llvm::TimeTraceScope timeScope("Replace common symbols"); for (ELFFileBase *file : ctx.objectFiles) { if (!file->hasCommonSyms) @@ -2387,7 +2388,7 @@ static void markAddrsig(Symbol *s) { // and symbols referred to by address-significance tables. These sections are // ineligible for ICF. template <class ELFT> -static void findKeepUniqueSections(opt::InputArgList &args) { +static void findKeepUniqueSections(Ctx &ctx, opt::InputArgList &args) { for (auto *arg : args.filtered(OPT_keep_unique)) { StringRef name = arg->getValue(); auto *d = dyn_cast_or_null<Defined>(symtab.find(name)); @@ -2400,7 +2401,7 @@ static void findKeepUniqueSections(opt::InputArgList &args) { // --icf=all --ignore-data-address-equality means that we can ignore // the dynsym and address-significance tables entirely. - if (config->icf == ICFLevel::All && config->ignoreDataAddressEquality) + if (ctx.arg.icf == ICFLevel::All && ctx.arg.ignoreDataAddressEquality) return; // Symbols in the dynsym could be address-significant in other executables @@ -2669,7 +2670,7 @@ static void combineVersionedSymbol(Symbol &sym, // When this function is executed, only InputFiles and symbol table // contain pointers to symbol objects. We visit them to replace pointers, // so that wrapped symbols are swapped as instructed by the command line. -static void redirectSymbols(ArrayRef<WrappedSymbol> wrapped) { +static void redirectSymbols(Ctx &ctx, ArrayRef<WrappedSymbol> wrapped) { llvm::TimeTraceScope timeScope("Redirect symbols"); DenseMap<Symbol *, Symbol *> map; for (const WrappedSymbol &w : wrapped) { @@ -2680,7 +2681,7 @@ static void redirectSymbols(ArrayRef<WrappedSymbol> wrapped) { // If there are version definitions (versionDefinitions.size() > 2), enumerate // symbols with a non-default version (foo@v1) and check whether it should be // combined with foo or foo@@v1. - if (config->versionDefinitions.size() > 2) + if (ctx.arg.versionDefinitions.size() > 2) for (Symbol *sym : symtab.getSymbols()) if (sym->hasVersionSuffix) combineVersionedSymbol(*sym, map); @@ -2877,11 +2878,11 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) { // If an entry symbol is in a static archive, pull out that file now. if (Symbol *sym = symtab.find(ctx.arg.entry)) - handleUndefined(sym, "--entry"); + handleUndefined(ctx, sym, "--entry"); // Handle the `--undefined-glob <pattern>` options. for (StringRef pat : args::getStrings(args, OPT_undefined_glob)) - handleUndefinedGlob(pat); + handleUndefinedGlob(ctx, pat); // After potential archive member extraction involving ENTRY and // -u/--undefined-glob, check whether PROVIDE symbols should be defined (the @@ -2919,7 +2920,7 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) { if (!ctx.bitcodeFiles.empty()) { llvm::Triple TT(ctx.bitcodeFiles.front()->obj->getTargetTriple()); for (auto *s : lto::LTO::getRuntimeLibcallSymbols(TT)) - handleLibcall(s); + handleLibcall(ctx, s); } // Archive members defining __wrap symbols may be extracted. @@ -3006,9 +3007,9 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) { // Symbol resolution finished. Report backward reference problems, // --print-archive-stats=, and --why-extract=. - reportBackrefs(); - writeArchiveStats(); - writeWhyExtract(); + reportBackrefs(ctx); + writeArchiveStats(ctx); + writeWhyExtract(ctx); if (errorCount()) return; @@ -3050,10 +3051,10 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) { processArmCmseSymbols(); // Apply symbol renames for --wrap and combine foo@v1 and foo@@v1. - redirectSymbols(wrapped); + redirectSymbols(ctx, wrapped); // Replace common symbols with regular symbols. - replaceCommonSymbols(); + replaceCommonSymbols(ctx); { llvm::TimeTraceScope timeScope("Aggregate sections"); @@ -3104,7 +3105,7 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) { // Since we now have a complete set of input files, we can create // a .d file to record build dependencies. if (!ctx.arg.dependencyFile.empty()) - writeDependencyFile(); + writeDependencyFile(ctx); // Now that the number of partitions is fixed, save a pointer to the main // partition. @@ -3124,15 +3125,15 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) { // the output can be run on. For example if the OS can use 4k or 64k page // sizes then maxPageSize must be 64k for the output to be useable on both. // All important alignment decisions must use this value. - ctx.arg.maxPageSize = getMaxPageSize(args); + ctx.arg.maxPageSize = getMaxPageSize(ctx, args); // commonPageSize is the most common page size that the output will be run on. // For example if an OS can use 4k or 64k page sizes and 4k is more common // than 64k then commonPageSize is set to 4k. commonPageSize can be used for // optimizations such as DATA_SEGMENT_ALIGN in linker scripts. LLD's use of it // is limited to writing trap instructions on the last executable segment. - ctx.arg.commonPageSize = getCommonPageSize(args); + ctx.arg.commonPageSize = getCommonPageSize(ctx, args); - ctx.arg.imageBase = getImageBase(args); + ctx.arg.imageBase = getImageBase(ctx, args); // This adds a .comment section containing a version string. if (!ctx.arg.relocatable) @@ -3195,7 +3196,7 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) { // Two input sections with different output sections should not be folded. // ICF runs after processSectionCommands() so that we know the output sections. if (ctx.arg.icf != ICFLevel::None) { - findKeepUniqueSections<ELFT>(args); + findKeepUniqueSections<ELFT>(ctx, args); doIcf<ELFT>(); } diff --git a/lld/test/COFF/import_weak_alias.test b/lld/test/COFF/import_weak_alias.test new file mode 100644 index 000000000000..ae1817c67a20 --- /dev/null +++ b/lld/test/COFF/import_weak_alias.test @@ -0,0 +1,20 @@ +# REQUIRES: x86 + +# RUN: split-file %s %t.dir +# RUN: llvm-mc --filetype=obj -triple=x86_64-windows-msvc %t.dir/foo.s -o %t.foo.obj +# RUN: llvm-mc --filetype=obj -triple=x86_64-windows-msvc %t.dir/qux.s -o %t.qux.obj +# RUN: lld-link %t.qux.obj %t.foo.obj -out:%t.dll -dll +# +#--- foo.s +.text +bar: + ret + +.weak foo +.set foo, bar +#--- qux.s +.text +.global _DllMainCRTStartup +_DllMainCRTStartup: + call *__imp_foo(%rip) + ret diff --git a/lld/test/wasm/unsupported-pic-relocations.s b/lld/test/wasm/unsupported-pic-relocations.s new file mode 100644 index 000000000000..ea32e8468cdb --- /dev/null +++ b/lld/test/wasm/unsupported-pic-relocations.s @@ -0,0 +1,39 @@ +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s + +# RUN: not wasm-ld --experimental-pic -shared %t.o -o /dev/null 2>&1 | \ +# RUN: FileCheck %s + +# RUN: not wasm-ld --experimental-pic -shared %t.o -o /dev/null --unresolved-symbols=report-all 2>&1 | \ +# RUN: FileCheck %s + +# RUN: not wasm-ld --experimental-pic -shared %t.o -o /dev/null --warn-unresolved-symbols 2>&1 | \ +# RUN: FileCheck %s + +# RUN: not wasm-ld --experimental-pic -shared %t.o -o /dev/null --unresolved-symbols=ignore-all 2>&1 | \ +# RUN: FileCheck %s + +# RUN: not wasm-ld --experimental-pic -shared %t.o -o /dev/null --unresolved-symbols=import-dynamic 2>&1 | \ +# RUN: FileCheck %s + +.functype external_func () -> () + +use_undefined_function: + .functype use_undefined_function () -> () + i32.const external_func@TBREL + # CHECK: error: {{.*}}.o: relocation R_WASM_TABLE_INDEX_REL_SLEB is not supported against an undefined symbol `external_func` + drop + end_function + +use_undefined_data: + .functype use_undefined_data () -> () + i32.const external_data@MBREL + # CHECK: error: {{.*}}.o: relocation R_WASM_MEMORY_ADDR_REL_SLEB is not supported against an undefined symbol `external_data` + drop + end_function + +.globl _start +_start: + .functype _start () -> () + call use_undefined_function + call use_undefined_data + end_function diff --git a/lld/test/wasm/unsupported-pic-relocations64.s b/lld/test/wasm/unsupported-pic-relocations64.s new file mode 100644 index 000000000000..db9707b7fbac --- /dev/null +++ b/lld/test/wasm/unsupported-pic-relocations64.s @@ -0,0 +1,39 @@ +# RUN: llvm-mc -filetype=obj -triple=wasm64-unknown-unknown -o %t.o %s + +# RUN: not wasm-ld -mwasm64 --experimental-pic -shared %t.o -o /dev/null 2>&1 | \ +# RUN: FileCheck %s + +# RUN: not wasm-ld -mwasm64 --experimental-pic -shared %t.o -o /dev/null --unresolved-symbols=report-all 2>&1 | \ +# RUN: FileCheck %s + +# RUN: not wasm-ld -mwasm64 --experimental-pic -shared %t.o -o /dev/null --warn-unresolved-symbols 2>&1 | \ +# RUN: FileCheck %s + +# RUN: not wasm-ld -mwasm64 --experimental-pic -shared %t.o -o /dev/null --unresolved-symbols=ignore-all 2>&1 | \ +# RUN: FileCheck %s + +# RUN: not wasm-ld -mwasm64 --experimental-pic -shared %t.o -o /dev/null --unresolved-symbols=import-dynamic 2>&1 | \ +# RUN: FileCheck %s + +.functype external_func () -> () + +use_undefined_function: + .functype use_undefined_function () -> () + i64.const external_func@TBREL + # CHECK: error: {{.*}}.o: relocation R_WASM_TABLE_INDEX_REL_SLEB64 is not supported against an undefined symbol `external_func` + drop + end_function + +use_undefined_data: + .functype use_undefined_data () -> () + i64.const external_data@MBREL + # CHECK: error: {{.*}}.o: relocation R_WASM_MEMORY_ADDR_REL_SLEB64 is not supported against an undefined symbol `external_data` + drop + end_function + +.globl _start +_start: + .functype _start () -> () + call use_undefined_function + call use_undefined_data + end_function diff --git a/lld/wasm/Relocations.cpp b/lld/wasm/Relocations.cpp index 6f33a4f28a9d..2dbfe3354947 100644 --- a/lld/wasm/Relocations.cpp +++ b/lld/wasm/Relocations.cpp @@ -173,6 +173,22 @@ void scanRelocations(InputChunk *chunk) { } } + if (sym->isUndefined()) { + switch (reloc.Type) { + case R_WASM_TABLE_INDEX_REL_SLEB: + case R_WASM_TABLE_INDEX_REL_SLEB64: + case R_WASM_MEMORY_ADDR_REL_SLEB: + case R_WASM_MEMORY_ADDR_REL_SLEB64: + // These relocation types are for symbols that exists relative to + // `__memory_base` or `__table_base` and as such only make sense for + // defined symbols. + error(toString(file) + ": relocation " + relocTypeToString(reloc.Type) + + " is not supported against an undefined symbol `" + + toString(*sym) + "`"); + break; + } + } + if (sym->isUndefined() && !config->relocatable && !sym->isWeak()) { // Report undefined symbols reportUndefined(file, sym); diff --git a/lldb/include/lldb/Utility/Scalar.h b/lldb/include/lldb/Utility/Scalar.h index 0d8eba3c9726..b4b9c7e18958 100644 --- a/lldb/include/lldb/Utility/Scalar.h +++ b/lldb/include/lldb/Utility/Scalar.h @@ -210,6 +210,7 @@ protected: static PromotionKey GetFloatPromoKey(const llvm::fltSemantics &semantics); private: + friend llvm::APFloat::cmpResult compare(Scalar lhs, Scalar rhs); friend const Scalar operator+(const Scalar &lhs, const Scalar &rhs); friend const Scalar operator-(Scalar lhs, Scalar rhs); friend const Scalar operator/(Scalar lhs, Scalar rhs); @@ -220,9 +221,9 @@ private: friend const Scalar operator^(Scalar lhs, Scalar rhs); friend const Scalar operator<<(const Scalar &lhs, const Scalar &rhs); friend const Scalar operator>>(const Scalar &lhs, const Scalar &rhs); - friend bool operator==(Scalar lhs, Scalar rhs); + friend bool operator==(const Scalar &lhs, const Scalar &rhs); friend bool operator!=(const Scalar &lhs, const Scalar &rhs); - friend bool operator<(Scalar lhs, Scalar rhs); + friend bool operator<(const Scalar &lhs, const Scalar &rhs); friend bool operator<=(const Scalar &lhs, const Scalar &rhs); friend bool operator>(const Scalar &lhs, const Scalar &rhs); friend bool operator>=(const Scalar &lhs, const Scalar &rhs); @@ -241,6 +242,7 @@ private: // Item 19 of "Effective C++ Second Edition" by Scott Meyers // Differentiate among members functions, non-member functions, and // friend functions +llvm::APFloat::cmpResult compare(Scalar lhs, Scalar rhs); const Scalar operator+(const Scalar &lhs, const Scalar &rhs); const Scalar operator-(Scalar lhs, Scalar rhs); const Scalar operator/(Scalar lhs, Scalar rhs); @@ -251,9 +253,9 @@ const Scalar operator%(Scalar lhs, Scalar rhs); const Scalar operator^(Scalar lhs, Scalar rhs); const Scalar operator<<(const Scalar &lhs, const Scalar &rhs); const Scalar operator>>(const Scalar &lhs, const Scalar &rhs); -bool operator==(Scalar lhs, Scalar rhs); +bool operator==(const Scalar &lhs, const Scalar &rhs); bool operator!=(const Scalar &lhs, const Scalar &rhs); -bool operator<(Scalar lhs, Scalar rhs); +bool operator<(const Scalar &lhs, const Scalar &rhs); bool operator<=(const Scalar &lhs, const Scalar &rhs); bool operator>(const Scalar &lhs, const Scalar &rhs); bool operator>=(const Scalar &lhs, const Scalar &rhs); diff --git a/lldb/include/lldb/Utility/Status.h b/lldb/include/lldb/Utility/Status.h index 795c830b9651..4a09c38ce62f 100644 --- a/lldb/include/lldb/Utility/Status.h +++ b/lldb/include/lldb/Utility/Status.h @@ -28,6 +28,69 @@ namespace lldb_private { const char *ExpressionResultAsCString(lldb::ExpressionResults result); +/// Going a bit against the spirit of llvm::Error, +/// lldb_private::Status need to store errors long-term and sometimes +/// copy them. This base class defines an interface for this +/// operation. +class CloneableError + : public llvm::ErrorInfo<CloneableError, llvm::ErrorInfoBase> { +public: + using llvm::ErrorInfo<CloneableError, llvm::ErrorInfoBase>::ErrorInfo; + CloneableError() : ErrorInfo() {} + virtual std::unique_ptr<CloneableError> Clone() const = 0; + static char ID; +}; + +/// Common base class for all error-code errors. +class CloneableECError + : public llvm::ErrorInfo<CloneableECError, CloneableError> { +public: + using llvm::ErrorInfo<CloneableECError, CloneableError>::ErrorInfo; + CloneableECError() = delete; + CloneableECError(std::error_code ec) : ErrorInfo(), EC(ec) {} + std::error_code convertToErrorCode() const override { return EC; } + void log(llvm::raw_ostream &OS) const override { OS << EC.message(); } + std::unique_ptr<CloneableError> Clone() const override; + static char ID; + +protected: + std::error_code EC; +}; + +/// FIXME: Move these declarations closer to where they're used. +class MachKernelError + : public llvm::ErrorInfo<MachKernelError, CloneableECError> { +public: + using llvm::ErrorInfo<MachKernelError, CloneableECError>::ErrorInfo; + MachKernelError(std::error_code ec) : ErrorInfo(ec) {} + std::string message() const override; + std::unique_ptr<CloneableError> Clone() const override; + static char ID; +}; + +class Win32Error : public llvm::ErrorInfo<Win32Error, CloneableECError> { +public: + using llvm::ErrorInfo<Win32Error, CloneableECError>::ErrorInfo; + Win32Error(std::error_code ec, const llvm::Twine &msg = {}) : ErrorInfo(ec) {} + std::string message() const override; + std::unique_ptr<CloneableError> Clone() const override; + static char ID; +}; + +class ExpressionError + : public llvm::ErrorInfo<ExpressionError, CloneableECError> { +public: + using llvm::ErrorInfo<ExpressionError, CloneableECError>::ErrorInfo; + ExpressionError(std::error_code ec, std::string msg = {}) + : ErrorInfo(ec), m_string(msg) {} + std::unique_ptr<CloneableError> Clone() const override; + std::string message() const override { return m_string; } + static char ID; + +protected: + std::string m_string; +}; + /// \class Status Status.h "lldb/Utility/Status.h" An error handling class. /// /// This class is designed to be able to hold any error code that can be @@ -100,9 +163,7 @@ public: } static Status FromExpressionError(lldb::ExpressionResults result, - std::string msg) { - return Status(result, lldb::eErrorTypeExpression, msg); - } + std::string msg); /// Set the current error to errno. /// @@ -115,6 +176,7 @@ public: const Status &operator=(Status &&); /// Avoid using this in new code. Migrate APIs to llvm::Expected instead. static Status FromError(llvm::Error error); + /// FIXME: Replace this with a takeError() method. llvm::Error ToError() const; /// Don't call this function in new code. Instead, redesign the API @@ -149,12 +211,20 @@ public: /// Access the error value. /// + /// If the internally stored \ref llvm::Error is an \ref + /// llvm::ErrorList then this returns the error value of the first + /// error. + /// /// \return /// The error value. ValueType GetError() const; /// Access the error type. /// + /// If the internally stored \ref llvm::Error is an \ref + /// llvm::ErrorList then this returns the error value of the first + /// error. + /// /// \return /// The error type enumeration value. lldb::ErrorType GetType() const; @@ -170,12 +240,9 @@ public: bool Success() const; protected: - Status(llvm::Error error); - /// Status code as an integer value. - ValueType m_code = 0; - /// The type of the above error code. - lldb::ErrorType m_type = lldb::eErrorTypeInvalid; - /// A string representation of the error code. + Status(llvm::Error error) : m_error(std::move(error)) {} + llvm::Error m_error; + /// TODO: Replace this with just callling toString(m_error). mutable std::string m_string; }; diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServer.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServer.cpp index 9b72cb003528..d4aa90b2c773 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServer.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServer.cpp @@ -103,13 +103,14 @@ GDBRemoteCommunicationServer::SendErrorResponse(uint8_t err) { GDBRemoteCommunication::PacketResult GDBRemoteCommunicationServer::SendErrorResponse(const Status &error) { + uint8_t code = error.GetType() == eErrorTypePOSIX ? error.GetError() : 0xff; if (m_send_error_strings) { lldb_private::StreamString packet; - packet.Printf("E%2.2x;", static_cast<uint8_t>(error.GetError())); + packet.Printf("E%2.2x;", code); packet.PutStringAsRawHex8(error.AsCString()); return SendPacketNoLock(packet.GetString()); - } else - return SendErrorResponse(error.GetError()); + } + return SendErrorResponse(code); } GDBRemoteCommunication::PacketResult diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp index 24cf34300063..6ddd00df3a21 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp @@ -993,8 +993,8 @@ void PythonException::Restore() { } PythonException::~PythonException() { - Py_XDECREF(m_exception_type); Py_XDECREF(m_exception); + Py_XDECREF(m_exception_type); Py_XDECREF(m_traceback); Py_XDECREF(m_repr_bytes); } @@ -1108,9 +1108,10 @@ public: py_error = Status::FromError(r.takeError()); } base_error = Base::Close(); + // Cloning since the wrapped exception may still reference the PyThread. if (py_error.Fail()) - return py_error; - return base_error; + return py_error.Clone(); + return base_error.Clone(); }; PyObject *GetPythonObject() const { @@ -1196,7 +1197,8 @@ public: return Flush(); auto r = m_py_obj.CallMethod("close"); if (!r) - return Status::FromError(r.takeError()); + // Cloning since the wrapped exception may still reference the PyThread. + return Status::FromError(r.takeError()).Clone(); return Status(); } @@ -1204,7 +1206,8 @@ public: GIL takeGIL; auto r = m_py_obj.CallMethod("flush"); if (!r) - return Status::FromError(r.takeError()); + // Cloning since the wrapped exception may still reference the PyThread. + return Status::FromError(r.takeError()).Clone(); return Status(); } @@ -1240,7 +1243,8 @@ public: PyObject *pybuffer_p = PyMemoryView_FromMemory( const_cast<char *>((const char *)buf), num_bytes, PyBUF_READ); if (!pybuffer_p) - return Status::FromError(llvm::make_error<PythonException>()); + // Cloning since the wrapped exception may still reference the PyThread. + return Status::FromError(llvm::make_error<PythonException>()).Clone(); auto pybuffer = Take<PythonObject>(pybuffer_p); num_bytes = 0; auto bytes_written = As<long long>(m_py_obj.CallMethod("write", pybuffer)); @@ -1260,7 +1264,8 @@ public: auto pybuffer_obj = m_py_obj.CallMethod("read", (unsigned long long)num_bytes); if (!pybuffer_obj) - return Status::FromError(pybuffer_obj.takeError()); + // Cloning since the wrapped exception may still reference the PyThread. + return Status::FromError(pybuffer_obj.takeError()).Clone(); num_bytes = 0; if (pybuffer_obj.get().IsNone()) { // EOF @@ -1269,7 +1274,8 @@ public: } auto pybuffer = PythonBuffer::Create(pybuffer_obj.get()); if (!pybuffer) - return Status::FromError(pybuffer.takeError()); + // Cloning since the wrapped exception may still reference the PyThread. + return Status::FromError(pybuffer.takeError()).Clone(); memcpy(buf, pybuffer.get().get().buf, pybuffer.get().get().len); num_bytes = pybuffer.get().get().len; return Status(); @@ -1300,7 +1306,8 @@ public: auto bytes_written = As<long long>(m_py_obj.CallMethod("write", pystring.get())); if (!bytes_written) - return Status::FromError(bytes_written.takeError()); + // Cloning since the wrapped exception may still reference the PyThread. + return Status::FromError(bytes_written.takeError()).Clone(); if (bytes_written.get() < 0) return Status::FromErrorString( ".write() method returned a negative number!"); @@ -1321,14 +1328,16 @@ public: auto pystring = As<PythonString>( m_py_obj.CallMethod("read", (unsigned long long)num_chars)); if (!pystring) - return Status::FromError(pystring.takeError()); + // Cloning since the wrapped exception may still reference the PyThread. + return Status::FromError(pystring.takeError()).Clone(); if (pystring.get().IsNone()) { // EOF return Status(); } auto stringref = pystring.get().AsUTF8(); if (!stringref) - return Status::FromError(stringref.takeError()); + // Cloning since the wrapped exception may still reference the PyThread. + return Status::FromError(stringref.takeError()).Clone(); num_bytes = stringref.get().size(); memcpy(buf, stringref.get().begin(), num_bytes); return Status(); diff --git a/lldb/source/Utility/Scalar.cpp b/lldb/source/Utility/Scalar.cpp index 329f5b6e4b9a..f07a9f3bed00 100644 --- a/lldb/source/Utility/Scalar.cpp +++ b/lldb/source/Utility/Scalar.cpp @@ -852,57 +852,50 @@ llvm::APFloat Scalar::CreateAPFloatFromAPFloat(lldb::BasicType basic_type) { } } -bool lldb_private::operator==(Scalar lhs, Scalar rhs) { +APFloat::cmpResult lldb_private::compare(Scalar lhs, Scalar rhs) { // If either entry is void then we can just compare the types if (lhs.m_type == Scalar::e_void || rhs.m_type == Scalar::e_void) - return lhs.m_type == rhs.m_type; + return lhs.m_type == rhs.m_type ? APFloat::cmpEqual : APFloat::cmpUnordered; - llvm::APFloat::cmpResult result; switch (Scalar::PromoteToMaxType(lhs, rhs)) { case Scalar::e_void: break; case Scalar::e_int: - return lhs.m_integer == rhs.m_integer; + if (lhs.m_integer < rhs.m_integer) + return APFloat::cmpLessThan; + if (lhs.m_integer > rhs.m_integer) + return APFloat::cmpGreaterThan; + return APFloat::cmpEqual; case Scalar::e_float: - result = lhs.m_float.compare(rhs.m_float); - if (result == llvm::APFloat::cmpEqual) - return true; + return lhs.m_float.compare(rhs.m_float); } - return false; + return APFloat::cmpUnordered; } -bool lldb_private::operator!=(const Scalar &lhs, const Scalar &rhs) { - return !(lhs == rhs); +bool lldb_private::operator==(const Scalar &lhs, const Scalar &rhs) { + return compare(lhs, rhs) == APFloat::cmpEqual; } -bool lldb_private::operator<(Scalar lhs, Scalar rhs) { - if (lhs.m_type == Scalar::e_void || rhs.m_type == Scalar::e_void) - return false; +bool lldb_private::operator!=(const Scalar &lhs, const Scalar &rhs) { + return compare(lhs, rhs) != APFloat::cmpEqual; +} - llvm::APFloat::cmpResult result; - switch (Scalar::PromoteToMaxType(lhs, rhs)) { - case Scalar::e_void: - break; - case Scalar::e_int: - return lhs.m_integer < rhs.m_integer; - case Scalar::e_float: - result = lhs.m_float.compare(rhs.m_float); - if (result == llvm::APFloat::cmpLessThan) - return true; - } - return false; +bool lldb_private::operator<(const Scalar &lhs, const Scalar &rhs) { + return compare(lhs, rhs) == APFloat::cmpLessThan; } bool lldb_private::operator<=(const Scalar &lhs, const Scalar &rhs) { - return !(rhs < lhs); + APFloat::cmpResult Res = compare(lhs, rhs); + return Res == APFloat::cmpLessThan || Res == APFloat::cmpEqual; } bool lldb_private::operator>(const Scalar &lhs, const Scalar &rhs) { - return rhs < lhs; + return compare(lhs, rhs) == APFloat::cmpGreaterThan; } bool lldb_private::operator>=(const Scalar &lhs, const Scalar &rhs) { - return !(lhs < rhs); + APFloat::cmpResult Res = compare(lhs, rhs); + return Res == APFloat::cmpGreaterThan || Res == APFloat::cmpEqual; } bool Scalar::ClearBit(uint32_t bit) { diff --git a/lldb/source/Utility/Status.cpp b/lldb/source/Utility/Status.cpp index 4af3af5fba01..a659456b9b1b 100644 --- a/lldb/source/Utility/Status.cpp +++ b/lldb/source/Utility/Status.cpp @@ -8,6 +8,8 @@ #include "lldb/Utility/Status.h" +#include "lldb/Utility/LLDBLog.h" +#include "lldb/Utility/Log.h" #include "lldb/Utility/VASPrintf.h" #include "lldb/lldb-defines.h" #include "lldb/lldb-enumerations.h" @@ -37,48 +39,80 @@ class raw_ostream; using namespace lldb; using namespace lldb_private; -Status::Status() {} +char CloneableError::ID; +char CloneableECError::ID; +char MachKernelError::ID; +char Win32Error::ID; +char ExpressionError::ID; + +namespace { +/// A std::error_code category for eErrorTypeGeneric. +class LLDBGenericCategory : public std::error_category { + const char *name() const noexcept override { return "LLDBGenericCategory"; } + std::string message(int __ev) const override { return "generic LLDB error"; }; +}; +LLDBGenericCategory &lldb_generic_category() { + static LLDBGenericCategory g_generic_category; + return g_generic_category; +} + +/// A std::error_code category for eErrorTypeExpression. +class ExpressionCategory : public std::error_category { + const char *name() const noexcept override { + return "LLDBExpressionCategory"; + } + std::string message(int __ev) const override { + return ExpressionResultAsCString( + static_cast<lldb::ExpressionResults>(__ev)); + }; +}; +ExpressionCategory &expression_category() { + static ExpressionCategory g_expression_category; + return g_expression_category; +} +} // namespace + +Status::Status() : m_error(llvm::Error::success()) {} + +static llvm::Error ErrorFromEnums(Status::ValueType err, ErrorType type, + std::string msg) { + switch (type) { + case eErrorTypeMachKernel: + return llvm::make_error<MachKernelError>( + std::error_code(err, std::system_category())); + case eErrorTypeWin32: + return llvm::make_error<Win32Error>( + std::error_code(err, std::system_category())); + case eErrorTypePOSIX: + if (msg.empty()) + return llvm::errorCodeToError( + std::error_code(err, std::generic_category())); + return llvm::createStringError( + std::move(msg), std::error_code(err, std::generic_category())); + default: + return llvm::createStringError( + std::move(msg), std::error_code(err, lldb_generic_category())); + } +} Status::Status(ValueType err, ErrorType type, std::string msg) - : m_code(err), m_type(type), m_string(std::move(msg)) {} + : m_error(ErrorFromEnums(err, type, msg)) {} -// This logic is confusing because c++ calls the traditional (posix) errno codes +// This logic is confusing because C++ calls the traditional (posix) errno codes // "generic errors", while we use the term "generic" to mean completely // arbitrary (text-based) errors. Status::Status(std::error_code EC) - : m_code(EC.value()), - m_type(EC.category() == std::generic_category() ? eErrorTypePOSIX - : eErrorTypeGeneric), - m_string(EC.message()) {} + : m_error(!EC ? llvm::Error::success() : llvm::errorCodeToError(EC)) {} Status::Status(std::string err_str) - : m_code(LLDB_GENERIC_ERROR), m_type(eErrorTypeGeneric), - m_string(std::move(err_str)) {} + : m_error( + llvm::createStringError(llvm::inconvertibleErrorCode(), err_str)) {} -Status::Status(llvm::Error error) { - if (!error) { - Clear(); - return; - } - - // if the error happens to be a errno error, preserve the error code - error = llvm::handleErrors( - std::move(error), [&](std::unique_ptr<llvm::ECError> e) -> llvm::Error { - std::error_code ec = e->convertToErrorCode(); - if (ec.category() == std::generic_category()) { - m_code = ec.value(); - m_type = ErrorType::eErrorTypePOSIX; - return llvm::Error::success(); - } - return llvm::Error(std::move(e)); - }); - - // Otherwise, just preserve the message - if (error) { - m_code = LLDB_GENERIC_ERROR; - m_type = eErrorTypeGeneric; - m_string = llvm::toString(std::move(error)); - } +const Status &Status::operator=(Status &&other) { + Clear(); + llvm::consumeError(std::move(m_error)); + m_error = std::move(other.m_error); + return *this; } Status Status::FromErrorStringWithFormat(const char *format, ...) { @@ -94,26 +128,40 @@ Status Status::FromErrorStringWithFormat(const char *format, ...) { return Status(string); } -Status Status::FromError(llvm::Error error) { return Status(std::move(error)); } - -llvm::Error Status::ToError() const { - if (Success()) - return llvm::Error::success(); - if (m_type == ErrorType::eErrorTypePOSIX) - return llvm::errorCodeToError( - std::error_code(m_code, std::generic_category())); - return llvm::createStringError(AsCString()); +Status Status::FromExpressionError(lldb::ExpressionResults result, + std::string msg) { + return Status(llvm::make_error<ExpressionError>( + std::error_code(result, expression_category()), msg)); } -Status::~Status() = default; +/// Creates a deep copy of all known errors and converts all other +/// errors to a new llvm::StringError. +static llvm::Error CloneError(const llvm::Error &error) { + llvm::Error result = llvm::Error::success(); + auto clone = [](const llvm::ErrorInfoBase &e) { + if (e.isA<CloneableError>()) + return llvm::Error(static_cast<const CloneableError &>(e).Clone()); + return llvm::make_error<llvm::StringError>(e.message(), + e.convertToErrorCode(), true); + }; + llvm::visitErrors(error, [&](const llvm::ErrorInfoBase &e) { + result = joinErrors(std::move(result), clone(e)); + }); + return result; +} -const Status &Status::operator=(Status &&other) { - m_code = other.m_code; - m_type = other.m_type; - m_string = std::move(other.m_string); - return *this; +Status Status::FromError(llvm::Error error) { + if (error.isA<llvm::ECError>()) { + std::error_code ec = llvm::errorToErrorCode(std::move(error)); + return Status::FromError(llvm::make_error<CloneableECError>(ec)); + } + return Status(std::move(error)); } +llvm::Error Status::ToError() const { return CloneError(m_error); } + +Status::~Status() { llvm::consumeError(std::move(m_error)); } + #ifdef _WIN32 static std::string RetrieveWin32ErrorString(uint32_t error_code) { char *buffer = nullptr; @@ -140,6 +188,37 @@ static std::string RetrieveWin32ErrorString(uint32_t error_code) { } #endif +std::string MachKernelError::message() const { +#if defined(__APPLE__) + if (const char *s = ::mach_error_string(convertToErrorCode().value())) + return s; +#endif + return "MachKernelError"; +} + +std::string Win32Error::message() const { +#if defined(_WIN32) + return RetrieveWin32ErrorString(convertToErrorCode().value()); +#endif + return "Win32Error"; +} + +std::unique_ptr<CloneableError> CloneableECError::Clone() const { + return std::make_unique<CloneableECError>(convertToErrorCode()); +} + +std::unique_ptr<CloneableError> MachKernelError::Clone() const { + return std::make_unique<MachKernelError>(convertToErrorCode()); +} + +std::unique_ptr<CloneableError> Win32Error::Clone() const { + return std::make_unique<Win32Error>(convertToErrorCode()); +} + +std::unique_ptr<CloneableError> ExpressionError::Clone() const { + return std::make_unique<ExpressionError>(convertToErrorCode(), message()); +} + // Get the error value as a NULL C string. The error string will be fetched and // cached on demand. The cached error string value will remain until the error // value is changed or cleared. @@ -147,29 +226,12 @@ const char *Status::AsCString(const char *default_error_str) const { if (Success()) return nullptr; - if (m_string.empty()) { - switch (m_type) { - case eErrorTypeMachKernel: -#if defined(__APPLE__) - if (const char *s = ::mach_error_string(m_code)) - m_string.assign(s); -#endif - break; - - case eErrorTypePOSIX: - m_string = llvm::sys::StrError(m_code); - break; - - case eErrorTypeWin32: -#if defined(_WIN32) - m_string = RetrieveWin32ErrorString(m_code); -#endif - break; + m_string = llvm::toStringWithoutConsuming(m_error); + // Backwards compatibility with older implementations of Status. + if (m_error.isA<llvm::ECError>()) + if (!m_string.empty() && m_string[m_string.size() - 1] == '\n') + m_string.pop_back(); - default: - break; - } - } if (m_string.empty()) { if (default_error_str) m_string.assign(default_error_str); @@ -181,29 +243,64 @@ const char *Status::AsCString(const char *default_error_str) const { // Clear the error and any cached error string that it might contain. void Status::Clear() { - m_code = 0; - m_type = eErrorTypeInvalid; - m_string.clear(); + if (m_error) + LLDB_LOG_ERRORV(GetLog(LLDBLog::API), std::move(m_error), + "dropping error {0}"); + m_error = llvm::Error::success(); } -// Access the error value. -Status::ValueType Status::GetError() const { return m_code; } +Status::ValueType Status::GetError() const { + Status::ValueType result = 0; + llvm::visitErrors(m_error, [&](const llvm::ErrorInfoBase &error) { + // Return the first only. + if (result) + return; + std::error_code ec = error.convertToErrorCode(); + result = ec.value(); + }); + return result; +} // Access the error type. -ErrorType Status::GetType() const { return m_type; } +ErrorType Status::GetType() const { + ErrorType result = eErrorTypeInvalid; + llvm::visitErrors(m_error, [&](const llvm::ErrorInfoBase &error) { + // Return the first only. + if (result != eErrorTypeInvalid) + return; + if (error.isA<MachKernelError>()) + result = eErrorTypeMachKernel; + else if (error.isA<Win32Error>()) + result = eErrorTypeWin32; + else if (error.isA<ExpressionError>()) + result = eErrorTypeExpression; + else if (error.convertToErrorCode().category() == std::generic_category()) + result = eErrorTypePOSIX; + else if (error.convertToErrorCode().category() == lldb_generic_category() || + error.convertToErrorCode() == llvm::inconvertibleErrorCode()) + result = eErrorTypeGeneric; + else + result = eErrorTypeInvalid; + }); + return result; +} -// Returns true if this object contains a value that describes an error or -// otherwise non-success result. -bool Status::Fail() const { return m_code != 0; } +bool Status::Fail() const { + // Note that this does not clear the checked flag in + // m_error. Otherwise we'd need to make this thread-safe. + return m_error.isA<llvm::ErrorInfoBase>(); +} Status Status::FromErrno() { - // Update the error value to be "errno" and update the type to be "POSIX". - return Status(errno, eErrorTypePOSIX); + std::error_code ec = llvm::errnoAsErrorCode(); + if (ec) + return Status::FromError(llvm::make_error<CloneableECError>(ec)); + return Status(); } // Returns true if the error code in this object is considered a successful // return value. -bool Status::Success() const { return m_code == 0; } +bool Status::Success() const { return !Fail(); } void llvm::format_provider<lldb_private::Status>::format( const lldb_private::Status &error, llvm::raw_ostream &OS, diff --git a/lldb/test/API/lang/cpp/fpnan/Makefile b/lldb/test/API/lang/cpp/fpnan/Makefile new file mode 100644 index 000000000000..99998b20bcb0 --- /dev/null +++ b/lldb/test/API/lang/cpp/fpnan/Makefile @@ -0,0 +1,3 @@ +CXX_SOURCES := main.cpp + +include Makefile.rules diff --git a/lldb/test/API/lang/cpp/fpnan/TestFPNaN.py b/lldb/test/API/lang/cpp/fpnan/TestFPNaN.py new file mode 100644 index 000000000000..6093ef91ac1f --- /dev/null +++ b/lldb/test/API/lang/cpp/fpnan/TestFPNaN.py @@ -0,0 +1,130 @@ +""" +Test floating point expressions with zero, NaN, dernormalized and infinite +numbers. +""" + +import lldb +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class FPNaNTestCase(TestBase): + def setUp(self): + # Call super's setUp(). + TestBase.setUp(self) + # Find the line number to break inside main(). + self.line = line_number("main.cpp", "// Set break point at this line.") + + def test(self): + self.build() + exe = self.getBuildArtifact("a.out") + self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET) + + # Break inside the main. + lldbutil.run_break_set_by_file_and_line( + self, "main.cpp", self.line, num_expected_locations=1 + ) + + self.runCmd("run", RUN_SUCCEEDED) + # Zero and denorm + self.expect( + "expr +0.0", + VARIABLES_DISPLAYED_CORRECTLY, + substrs=["double", "0"], + ) + self.expect( + "expr -0.0", + VARIABLES_DISPLAYED_CORRECTLY, + substrs=["double", "0"], + ) + self.expect( + "expr 0.0 / 0", + VARIABLES_DISPLAYED_CORRECTLY, + substrs=["double", "NaN"], + ) + self.expect( + "expr 0 / 0.0", + VARIABLES_DISPLAYED_CORRECTLY, + substrs=["double", "NaN"], + ) + self.expect( + "expr 1 / +0.0", + VARIABLES_DISPLAYED_CORRECTLY, + substrs=["double", "+Inf"], + ) + self.expect( + "expr 1 / -0.0", + VARIABLES_DISPLAYED_CORRECTLY, + substrs=["double", "-Inf"], + ) + self.expect( + "expr +0.0 / +0.0 != +0.0 / +0.0", + VARIABLES_DISPLAYED_CORRECTLY, + substrs=["bool", "true"], + ) + self.expect( + "expr -1.f * 0", + VARIABLES_DISPLAYED_CORRECTLY, + substrs=["float", "-0"], + ) + self.expect( + "expr 0x0.123p-1", + VARIABLES_DISPLAYED_CORRECTLY, + substrs=["double", "0.0355224609375"], + ) + # NaN + self.expect( + "expr fnan < fnan", + VARIABLES_DISPLAYED_CORRECTLY, + substrs=["bool", "false"], + ) + self.expect( + "expr fnan <= fnan", + VARIABLES_DISPLAYED_CORRECTLY, + substrs=["bool", "false"], + ) + self.expect( + "expr fnan > fnan", + VARIABLES_DISPLAYED_CORRECTLY, + substrs=["bool", "false"], + ) + self.expect( + "expr fnan >= fnan", + VARIABLES_DISPLAYED_CORRECTLY, + substrs=["bool", "false"], + ) + self.expect( + "expr fnan == fnan", + VARIABLES_DISPLAYED_CORRECTLY, + substrs=["bool", "false"], + ) + self.expect( + "expr fnan != fnan", + VARIABLES_DISPLAYED_CORRECTLY, + substrs=["bool", "true"], + ) + self.expect( + "expr 1.0 <= fnan", + VARIABLES_DISPLAYED_CORRECTLY, + substrs=["bool", "false"], + ) + self.expect( + "expr 1.0f < fnan", + VARIABLES_DISPLAYED_CORRECTLY, + substrs=["bool", "false"], + ) + self.expect( + "expr 1.0f != fnan", + VARIABLES_DISPLAYED_CORRECTLY, + substrs=["bool", "true"], + ) + self.expect( + "expr (unsigned int) fdenorm", + VARIABLES_DISPLAYED_CORRECTLY, + substrs=["int", "0"], + ) + self.expect( + "expr (unsigned int) (1.0f + fdenorm)", + VARIABLES_DISPLAYED_CORRECTLY, + substrs=["int", "1"], + ) diff --git a/lldb/test/API/lang/cpp/fpnan/main.cpp b/lldb/test/API/lang/cpp/fpnan/main.cpp new file mode 100644 index 000000000000..8bcfebfaea8e --- /dev/null +++ b/lldb/test/API/lang/cpp/fpnan/main.cpp @@ -0,0 +1,8 @@ +#include <limits> + +int main() { + float fnan = std::numeric_limits<float>::quiet_NaN(); + float fdenorm = std::numeric_limits<float>::denorm_min(); + + // Set break point at this line. +} diff --git a/lldb/test/API/tools/lldb-dap/memory/TestDAP_memory.py b/lldb/test/API/tools/lldb-dap/memory/TestDAP_memory.py index 3d8aaeda7f4b..1082541aebcf 100644 --- a/lldb/test/API/tools/lldb-dap/memory/TestDAP_memory.py +++ b/lldb/test/API/tools/lldb-dap/memory/TestDAP_memory.py @@ -74,10 +74,6 @@ class TestDAP_memory(lldbdap_testcase.DAPTestCaseBase): ].keys(), ) - # lldb-dap assumes that all reads will be within the same region. On Windows - # the target string is at the very start of a region so the -1 offset causes - # the read to only read from the previous region and only return 1 byte. - @skipIfWindows def test_readMemory(self): """ Tests the 'readMemory' request @@ -104,10 +100,6 @@ class TestDAP_memory(lldbdap_testcase.DAPTestCaseBase): mem = self.dap_server.request_readMemory(memref, 2, 3)["body"] self.assertEqual(b64decode(mem["data"]), b"ad\0") - # Use a negative offset - mem = self.dap_server.request_readMemory(memref, -1, 6)["body"] - self.assertEqual(b64decode(mem["data"])[1:], b"dead\0") - # Reads of size 0 are successful # VS-Code sends those in order to check if a `memoryReference` can actually be dereferenced. mem = self.dap_server.request_readMemory(memref, 0, 0) diff --git a/lldb/tools/lldb-dap/package-lock.json b/lldb/tools/lldb-dap/package-lock.json index 96570e42dbfd..866365971559 100644 --- a/lldb/tools/lldb-dap/package-lock.json +++ b/lldb/tools/lldb-dap/package-lock.json @@ -1,12 +1,12 @@ { "name": "lldb-dap", - "version": "0.2.4", + "version": "0.2.6", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "lldb-dap", - "version": "0.2.4", + "version": "0.2.6", "license": "Apache 2.0 License with LLVM exceptions", "devDependencies": { "@types/node": "^18.11.18", diff --git a/lldb/tools/lldb-dap/package.json b/lldb/tools/lldb-dap/package.json index d35accfb6ec4..33b09d56ab17 100644 --- a/lldb/tools/lldb-dap/package.json +++ b/lldb/tools/lldb-dap/package.json @@ -1,7 +1,7 @@ { "name": "lldb-dap", "displayName": "LLDB DAP", - "version": "0.2.5", + "version": "0.2.6", "publisher": "llvm-vs-code-extensions", "homepage": "https://lldb.llvm.org", "description": "LLDB debugging from VSCode", @@ -78,6 +78,15 @@ "scope": "resource", "type": "string", "description": "The log path for lldb-dap (if any)" + }, + "lldb-dap.environment": { + "scope": "resource", + "type": "object", + "default": {}, + "description": "The environment of the lldb-dap process.", + "additionalProperties": { + "type": "string" + } } } }, diff --git a/lldb/tools/lldb-dap/src-ts/extension.ts b/lldb/tools/lldb-dap/src-ts/extension.ts index fdc4f47b238b..36d3dfba18c1 100644 --- a/lldb/tools/lldb-dap/src-ts/extension.ts +++ b/lldb/tools/lldb-dap/src-ts/extension.ts @@ -25,9 +25,15 @@ function createDefaultLLDBDapOptions(): LLDBDapOptions { if (log_path) { env["LLDBDAP_LOG"] = log_path; } - + const configEnvironment = config.get<{ [key: string]: string }>("environment") || {}; if (path) { - return new vscode.DebugAdapterExecutable(path, [], { env }); + const dbgOptions = { + env: { + ...configEnvironment, + ...env, + } + }; + return new vscode.DebugAdapterExecutable(path, [], dbgOptions); } else if (packageJSONExecutable) { return new vscode.DebugAdapterExecutable( packageJSONExecutable.command, @@ -36,6 +42,7 @@ function createDefaultLLDBDapOptions(): LLDBDapOptions { ...packageJSONExecutable.options, env: { ...packageJSONExecutable.options?.env, + ...configEnvironment, ...env, }, }, diff --git a/lldb/unittests/Process/gdb-remote/GDBRemoteCommunicationServerTest.cpp b/lldb/unittests/Process/gdb-remote/GDBRemoteCommunicationServerTest.cpp index 69ca1720c04f..ba9ca6ea73e3 100644 --- a/lldb/unittests/Process/gdb-remote/GDBRemoteCommunicationServerTest.cpp +++ b/lldb/unittests/Process/gdb-remote/GDBRemoteCommunicationServerTest.cpp @@ -12,6 +12,7 @@ #include "Plugins/Process/gdb-remote/GDBRemoteCommunicationServer.h" #include "lldb/Utility/Connection.h" #include "lldb/Utility/UnimplementedError.h" +#include "lldb/lldb-enumerations.h" namespace lldb_private { namespace process_gdb_remote { @@ -25,7 +26,7 @@ TEST(GDBRemoteCommunicationServerTest, SendErrorResponse_ErrorNumber) { TEST(GDBRemoteCommunicationServerTest, SendErrorResponse_Status) { MockServerWithMockConnection server; - Status status(0x42, lldb::eErrorTypeGeneric, "Test error message"); + Status status(0x42, lldb::eErrorTypePOSIX, "Test error message"); server.SendErrorResponse(status); EXPECT_THAT( diff --git a/lldb/unittests/TestingSupport/Host/SocketTestUtilities.cpp b/lldb/unittests/TestingSupport/Host/SocketTestUtilities.cpp index 2455a4f6f5d4..9777555d57ff 100644 --- a/lldb/unittests/TestingSupport/Host/SocketTestUtilities.cpp +++ b/lldb/unittests/TestingSupport/Host/SocketTestUtilities.cpp @@ -102,12 +102,23 @@ static bool CheckIPSupport(llvm::StringRef Proto, llvm::StringRef Addr) { Proto, Err) .str(); bool HasProtocolError = false; - handleAllErrors(std::move(Err), [&](std::unique_ptr<llvm::ECError> ECErr) { - std::error_code ec = ECErr->convertToErrorCode(); - if (ec == std::make_error_code(std::errc::address_family_not_supported) || - ec == std::make_error_code(std::errc::address_not_available)) - HasProtocolError = true; - }); + handleAllErrors( + std::move(Err), + [&](std::unique_ptr<CloneableECError> ECErr) { + std::error_code ec = ECErr->convertToErrorCode(); + if (ec == + std::make_error_code(std::errc::address_family_not_supported) || + ec == std::make_error_code(std::errc::address_not_available)) + HasProtocolError = true; + }, + [&](std::unique_ptr<llvm::ECError> ECErr) { + // FIXME: This code path should not be reachable. + std::error_code ec = ECErr->convertToErrorCode(); + if (ec == + std::make_error_code(std::errc::address_family_not_supported) || + ec == std::make_error_code(std::errc::address_not_available)) + HasProtocolError = true; + }); if (HasProtocolError) { GTEST_LOG_(WARNING) << llvm::formatv( diff --git a/lldb/unittests/Utility/StatusTest.cpp b/lldb/unittests/Utility/StatusTest.cpp index be4f2beebcdb..e37c94ac17f2 100644 --- a/lldb/unittests/Utility/StatusTest.cpp +++ b/lldb/unittests/Utility/StatusTest.cpp @@ -70,6 +70,14 @@ TEST(StatusTest, ErrorConversion) { llvm::Error foo = Status::FromErrorString("foo").ToError(); EXPECT_TRUE(bool(foo)); EXPECT_EQ("foo", llvm::toString(std::move(foo))); + + llvm::Error eperm = llvm::errorCodeToError({EPERM, std::generic_category()}); + llvm::Error eintr = llvm::errorCodeToError({EINTR, std::generic_category()}); + llvm::Error elist = llvm::joinErrors(std::move(eperm), std::move(eintr)); + elist = llvm::joinErrors(std::move(elist), llvm::createStringError("foo")); + Status list = Status::FromError(std::move(elist)); + EXPECT_EQ((int)list.GetError(), EPERM); + EXPECT_EQ(list.GetType(), eErrorTypePOSIX); } #ifdef _WIN32 diff --git a/llvm/cmake/config-ix.cmake b/llvm/cmake/config-ix.cmake index 3707ca824f6e..86f2bac7d23e 100644 --- a/llvm/cmake/config-ix.cmake +++ b/llvm/cmake/config-ix.cmake @@ -51,11 +51,9 @@ endif() check_include_file(signal.h HAVE_SIGNAL_H) check_include_file(sys/ioctl.h HAVE_SYS_IOCTL_H) check_include_file(sys/mman.h HAVE_SYS_MMAN_H) -check_include_file(sys/param.h HAVE_SYS_PARAM_H) check_include_file(sys/resource.h HAVE_SYS_RESOURCE_H) check_include_file(sys/stat.h HAVE_SYS_STAT_H) check_include_file(sys/time.h HAVE_SYS_TIME_H) -check_include_file(sys/types.h HAVE_SYS_TYPES_H) check_include_file(sysexits.h HAVE_SYSEXITS_H) check_include_file(termios.h HAVE_TERMIOS_H) check_include_file(unistd.h HAVE_UNISTD_H) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 144b4497ca63..abeafb761620 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -354,7 +354,7 @@ added in the future: not be used lightly but only for specific situations such as an alternative to the *register pinning* performance technique often used when implementing functional programming languages. At the - moment only X86, AArch64, and RISCV support this convention. The + moment only X86, AArch64, and RISCV support this convention. The following limitations exist: - On *X86-32* only up to 4 bit type parameters are supported. No @@ -685,10 +685,10 @@ implementation defined, the optimizer can't do the latter. The former is challenging as many commonly expected properties, such as ``ptrtoint(v)-ptrtoint(v) == 0``, don't hold for non-integral types. Similar restrictions apply to intrinsics that might examine the pointer bits, -such as :ref:`llvm.ptrmask<int_ptrmask>`. +such as :ref:`llvm.ptrmask<int_ptrmask>`. The alignment information provided by the frontend for a non-integral pointer -(typically using attributes or metadata) must be valid for every possible +(typically using attributes or metadata) must be valid for every possible representation of the pointer. .. _globalvars: @@ -1677,10 +1677,10 @@ Currently, only the following parameter attributes are defined: - The range is allowed to wrap. - The empty range is represented using ``0,0``. - Otherwise, ``a`` and ``b`` are not allowed to be equal. - - This attribute may only be applied to parameters or return values with integer + + This attribute may only be applied to parameters or return values with integer or vector of integer types. - + For vector-typed parameters, the range is applied element-wise. .. _gc: @@ -14346,7 +14346,7 @@ Arguments: """""""""" The first 4 arguments are similar to ``llvm.instrprof.increment``. The indexing is specific to callsites, meaning callsites are indexed from 0, independent from -the indexes used by the other intrinsics (such as +the indexes used by the other intrinsics (such as ``llvm.instrprof.increment[.step]``). The last argument is the called value of the callsite this intrinsic precedes. @@ -14360,7 +14360,7 @@ a buffer LLVM can use to perform counter increments (i.e. the lowering of ``llvm.instrprof.increment[.step]``. The address range following the counter buffer, ``<num-counters>`` x ``sizeof(ptr)`` - sized, is expected to contain pointers to contexts of functions called from this function ("subcontexts"). -LLVM does not dereference into that memory region, just calculates GEPs. +LLVM does not dereference into that memory region, just calculates GEPs. The lowering of ``llvm.instrprof.callsite`` consists of: @@ -14929,8 +14929,8 @@ integer bit width or any vector of integer elements. Overview: """"""""" -Return ``-1`` if ``%a`` is signed less than ``%b``, ``0`` if they are equal, and -``1`` if ``%a`` is signed greater than ``%b``. Vector intrinsics operate on a per-element basis. +Return ``-1`` if ``%a`` is signed less than ``%b``, ``0`` if they are equal, and +``1`` if ``%a`` is signed greater than ``%b``. Vector intrinsics operate on a per-element basis. Arguments: """""""""" @@ -14958,8 +14958,8 @@ integer bit width or any vector of integer elements. Overview: """"""""" -Return ``-1`` if ``%a`` is unsigned less than ``%b``, ``0`` if they are equal, and -``1`` if ``%a`` is unsigned greater than ``%b``. Vector intrinsics operate on a per-element basis. +Return ``-1`` if ``%a`` is unsigned less than ``%b``, ``0`` if they are equal, and +``1`` if ``%a`` is unsigned greater than ``%b``. Vector intrinsics operate on a per-element basis. Arguments: """""""""" @@ -21556,9 +21556,9 @@ Semantics: """""""""" The '``llvm.vp.minimum``' intrinsic performs floating-point minimum (:ref:`minimum <i_minimum>`) -of the first and second vector arguments on each enabled lane, the result being +of the first and second vector arguments on each enabled lane, the result being NaN if either argument is a NaN. -0.0 is considered to be less than +0.0 for this -intrinsic. The result on disabled lanes is a :ref:`poison value <poisonvalues>`. +intrinsic. The result on disabled lanes is a :ref:`poison value <poisonvalues>`. The operation is performed in the default floating-point environment. Examples: @@ -29191,7 +29191,7 @@ Semantics: """""""""" The intrinsic ``@llvm.allow.ubsan.check()`` returns either ``true`` or -``false``, depending on compiler options. +``false``, depending on compiler options. For each evaluation of a call to this intrinsic, the program must be valid and correct both if it returns ``true`` and if it returns ``false``. @@ -29250,13 +29250,13 @@ Semantics: """""""""" The intrinsic ``@llvm.allow.runtime.check()`` returns either ``true`` or -``false``, depending on compiler options. +``false``, depending on compiler options. For each evaluation of a call to this intrinsic, the program must be valid and correct both if it returns ``true`` and if it returns ``false``. When used in a branch condition, it allows us to choose between -two alternative correct solutions for the same problem. +two alternative correct solutions for the same problem. If the intrinsic is evaluated as ``true``, program should execute a guarded check. If the intrinsic is evaluated as ``false``, the program should avoid any diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.def b/llvm/include/llvm/Analysis/TargetLibraryInfo.def index 5914324b286c..ebc917ea53eb 100644 --- a/llvm/include/llvm/Analysis/TargetLibraryInfo.def +++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.def @@ -2183,6 +2183,21 @@ TLI_DEFINE_ENUM_INTERNAL(sinl) TLI_DEFINE_STRING_INTERNAL("sinl") TLI_DEFINE_SIG_INTERNAL(LDbl, LDbl) +/// void sincos(double x, double *sin_out, double *cos_out); +TLI_DEFINE_ENUM_INTERNAL(sincos) +TLI_DEFINE_STRING_INTERNAL("sincos") +TLI_DEFINE_SIG_INTERNAL(Void, Dbl, Ptr, Ptr) + +/// void sincosf(float x, float *sin_out, float *cos_out); +TLI_DEFINE_ENUM_INTERNAL(sincosf) +TLI_DEFINE_STRING_INTERNAL("sincosf") +TLI_DEFINE_SIG_INTERNAL(Void, Flt, Ptr, Ptr) + +/// void sincosl(long double x, long double *sin_out, long double *cos_out); +TLI_DEFINE_ENUM_INTERNAL(sincosl) +TLI_DEFINE_STRING_INTERNAL("sincosl") +TLI_DEFINE_SIG_INTERNAL(Void, LDbl, Ptr, Ptr) + /// int siprintf(char *str, const char *format, ...); TLI_DEFINE_ENUM_INTERNAL(siprintf) TLI_DEFINE_STRING_INTERNAL("siprintf") diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index 7ee8ca18c2c1..d6c2c36a0d48 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -772,7 +772,7 @@ public: SDValue getMCSymbol(MCSymbol *Sym, EVT VT); SDValue getValueType(EVT); - SDValue getRegister(unsigned Reg, EVT VT); + SDValue getRegister(Register Reg, EVT VT); SDValue getRegisterMask(const uint32_t *RegMask); SDValue getEHLabel(const SDLoc &dl, SDValue Root, MCSymbol *Label); SDValue getLabelNode(unsigned Opcode, const SDLoc &dl, SDValue Root, @@ -784,7 +784,7 @@ public: return getBlockAddress(BA, VT, Offset, true, TargetFlags); } - SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, + SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N) { return getNode(ISD::CopyToReg, dl, MVT::Other, Chain, getRegister(Reg, N.getValueType()), N); @@ -793,7 +793,7 @@ public: // This version of the getCopyToReg method takes an extra operand, which // indicates that there is potentially an incoming glue value (if Glue is not // null) and that there should be a glue result. - SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N, + SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N, SDValue Glue) { SDVTList VTs = getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, getRegister(Reg, N.getValueType()), N, Glue }; @@ -810,7 +810,7 @@ public: ArrayRef(Ops, Glue.getNode() ? 4 : 3)); } - SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT) { + SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT) { SDVTList VTs = getVTList(VT, MVT::Other); SDValue Ops[] = { Chain, getRegister(Reg, VT) }; return getNode(ISD::CopyFromReg, dl, VTs, Ops); @@ -819,7 +819,7 @@ public: // This version of the getCopyFromReg method takes an extra operand, which // indicates that there is potentially an incoming glue value (if Glue is not // null) and that there should be a glue result. - SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT, + SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT, SDValue Glue) { SDVTList VTs = getVTList(VT, MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, getRegister(Reg, VT), Glue }; diff --git a/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h b/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h index 9282c4a771af..9f1d6f7b4f95 100644 --- a/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h +++ b/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h @@ -170,7 +170,7 @@ struct CaseBlock { struct JumpTable { /// The virtual register containing the index of the jump table entry /// to jump to. - unsigned Reg; + Register Reg; /// The JumpTableIndex for this jump table in the function. unsigned JTI; /// The MBB into which to emit the code for the indirect jump. @@ -182,7 +182,7 @@ struct JumpTable { /// The debug location of the instruction this JumpTable was produced from. std::optional<SDLoc> SL; // For SelectionDAG - JumpTable(unsigned R, unsigned J, MachineBasicBlock *M, MachineBasicBlock *D, + JumpTable(Register R, unsigned J, MachineBasicBlock *M, MachineBasicBlock *D, std::optional<SDLoc> SL) : Reg(R), JTI(J), MBB(M), Default(D), SL(SL) {} }; @@ -218,7 +218,7 @@ struct BitTestBlock { APInt First; APInt Range; const Value *SValue; - unsigned Reg; + Register Reg; MVT RegVT; bool Emitted; bool ContiguousRange; @@ -229,7 +229,7 @@ struct BitTestBlock { BranchProbability DefaultProb; bool FallthroughUnreachable = false; - BitTestBlock(APInt F, APInt R, const Value *SV, unsigned Rg, MVT RgVT, bool E, + BitTestBlock(APInt F, APInt R, const Value *SV, Register Rg, MVT RgVT, bool E, bool CR, MachineBasicBlock *P, MachineBasicBlock *D, BitTestInfo C, BranchProbability Pr) : First(std::move(F)), Range(std::move(R)), SValue(SV), Reg(Rg), diff --git a/llvm/include/llvm/CodeGen/VirtRegMap.h b/llvm/include/llvm/CodeGen/VirtRegMap.h index dee462255b0b..52221762fed5 100644 --- a/llvm/include/llvm/CodeGen/VirtRegMap.h +++ b/llvm/include/llvm/CodeGen/VirtRegMap.h @@ -31,12 +31,6 @@ class raw_ostream; class TargetInstrInfo; class VirtRegMap : public MachineFunctionPass { - public: - enum { - NO_STACK_SLOT = (1L << 30)-1, - }; - - private: MachineRegisterInfo *MRI = nullptr; const TargetInstrInfo *TII = nullptr; const TargetRegisterInfo *TRI = nullptr; @@ -69,6 +63,8 @@ class TargetInstrInfo; public: static char ID; + static constexpr int NO_STACK_SLOT = INT_MAX; + VirtRegMap() : MachineFunctionPass(ID), Virt2StackSlotMap(NO_STACK_SLOT) {} VirtRegMap(const VirtRegMap &) = delete; VirtRegMap &operator=(const VirtRegMap &) = delete; diff --git a/llvm/include/llvm/Config/config.h.cmake b/llvm/include/llvm/Config/config.h.cmake index d71ff40144c0..4c9404d95daf 100644 --- a/llvm/include/llvm/Config/config.h.cmake +++ b/llvm/include/llvm/Config/config.h.cmake @@ -191,9 +191,6 @@ /* Define to 1 if you have the <sys/mman.h> header file. */ #cmakedefine HAVE_SYS_MMAN_H ${HAVE_SYS_MMAN_H} -/* Define to 1 if you have the <sys/param.h> header file. */ -#cmakedefine HAVE_SYS_PARAM_H ${HAVE_SYS_PARAM_H} - /* Define to 1 if you have the <sys/resource.h> header file. */ #cmakedefine HAVE_SYS_RESOURCE_H ${HAVE_SYS_RESOURCE_H} @@ -209,9 +206,6 @@ /* Define to 1 if stat struct has st_mtim member. */ #cmakedefine HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC ${HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC} -/* Define to 1 if you have the <sys/types.h> header file. */ -#cmakedefine HAVE_SYS_TYPES_H ${HAVE_SYS_TYPES_H} - /* Define to 1 if you have the <termios.h> header file. */ #cmakedefine HAVE_TERMIOS_H ${HAVE_TERMIOS_H} diff --git a/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h b/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h index 30ca43d2bde0..09b9d947464a 100644 --- a/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h +++ b/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h @@ -104,6 +104,8 @@ enum class PrimitiveKind { Double, Ldouble, Nullptr, + Auto, + DecltypeAuto, }; enum class CharKind { diff --git a/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h b/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h index 49ce417e6fbb..54ae436d90b2 100644 --- a/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h +++ b/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h @@ -426,7 +426,7 @@ public: virtual ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) = 0; - /// ParseInstruction - Parse one assembly instruction. + /// Parse one assembly instruction. /// /// The parser is positioned following the instruction name. The target /// specific instruction parser should parse the entire instruction and @@ -439,11 +439,11 @@ public: /// \param Operands [out] - The list of parsed operands, this returns /// ownership of them to the caller. /// \return True on failure. - virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + virtual bool parseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) = 0; - virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + virtual bool parseInstruction(ParseInstructionInfo &Info, StringRef Name, AsmToken Token, OperandVector &Operands) { - return ParseInstruction(Info, Name, Token.getLoc(), Operands); + return parseInstruction(Info, Name, Token.getLoc(), Operands); } /// ParseDirective - Parse a target specific assembler directive @@ -471,19 +471,19 @@ public: /// \param DirectiveID - The token identifying the directive. virtual ParseStatus parseDirective(AsmToken DirectiveID); - /// MatchAndEmitInstruction - Recognize a series of operands of a parsed + /// Recognize a series of operands of a parsed /// instruction as an actual MCInst and emit it to the specified MCStreamer. /// This returns false on success and returns true on failure to match. /// /// On failure, the target parser is responsible for emitting a diagnostic /// explaining the match failure. - virtual bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + virtual bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) = 0; /// Allows targets to let registers opt out of clobber lists. - virtual bool OmitRegisterFromClobberLists(unsigned RegNo) { return false; } + virtual bool omitRegisterFromClobberLists(unsigned RegNo) { return false; } /// Allow a target to add special case operand matching for things that /// tblgen doesn't/can't handle effectively. For example, literal diff --git a/llvm/include/llvm/SandboxIR/SandboxIR.h b/llvm/include/llvm/SandboxIR/SandboxIR.h index 22b46bd8d7da..c01516aa9d31 100644 --- a/llvm/include/llvm/SandboxIR/SandboxIR.h +++ b/llvm/include/llvm/SandboxIR/SandboxIR.h @@ -130,6 +130,7 @@ class GlobalValue; class GlobalObject; class GlobalIFunc; class GlobalVariable; +class GlobalAlias; class Context; class Function; class Instruction; @@ -336,6 +337,7 @@ protected: friend class GlobalObject; // For `Val`. friend class GlobalIFunc; // For `Val`. friend class GlobalVariable; // For `Val`. + friend class GlobalAlias; // For `Val`. /// All values point to the context. Context &Ctx; @@ -1528,6 +1530,38 @@ public: #endif }; +class GlobalAlias final + : public GlobalWithNodeAPI<GlobalAlias, llvm::GlobalAlias, GlobalValue, + llvm::GlobalValue> { + GlobalAlias(llvm::GlobalAlias *C, Context &Ctx) + : GlobalWithNodeAPI(ClassID::GlobalAlias, C, Ctx) {} + friend class Context; // For constructor. + +public: + /// For isa/dyn_cast. + static bool classof(const sandboxir::Value *From) { + return From->getSubclassID() == ClassID::GlobalAlias; + } + + // TODO: Missing create() due to unimplemented sandboxir::Module. + + // TODO: Missing copyAttributresFrom(). + // TODO: Missing removeFromParent(), eraseFromParent(). + + void setAliasee(Constant *Aliasee); + Constant *getAliasee() const; + + const GlobalObject *getAliaseeObject() const; + GlobalObject *getAliaseeObject() { + return const_cast<GlobalObject *>( + static_cast<const GlobalAlias *>(this)->getAliaseeObject()); + } + + static bool isValidLinkage(LinkageTypes L) { + return llvm::GlobalAlias::isValidLinkage(L); + } +}; + class BlockAddress final : public Constant { BlockAddress(llvm::BlockAddress *C, Context &Ctx) : Constant(ClassID::BlockAddress, C, Ctx) {} @@ -1659,6 +1693,8 @@ public: /// \Returns the SBInstruction that corresponds to this iterator, or null if /// the instruction is not found in the IR-to-SandboxIR tables. pointer get() const { return getInstr(It); } + /// \Returns the parent BB. + BasicBlock *getNodeParent() const; }; /// Contains a list of sandboxir::Instruction's. diff --git a/llvm/include/llvm/Support/GlobPattern.h b/llvm/include/llvm/Support/GlobPattern.h index 1a722e65bed8..eff8957d1b68 100644 --- a/llvm/include/llvm/Support/GlobPattern.h +++ b/llvm/include/llvm/Support/GlobPattern.h @@ -34,8 +34,8 @@ namespace llvm { /// expansions are not supported. If \p MaxSubPatterns is empty then /// brace expansions are not supported and characters \p "{,}" are treated as /// literals. -/// * \p "\" escapes the next character so it is treated as a literal. -/// +/// * \p "\\" (a single backslash) escapes the next character so it is treated +/// as a literal. /// /// Some known edge cases are: /// * \p "]" is allowed as the first character in a character class, i.e., @@ -45,9 +45,8 @@ namespace llvm { /// * \p "}" and \p "," that are not inside a brace expansion are taken as /// literals, e.g., \p ",}" is valid but \p "{" is not. /// -/// -/// For example, \p "*[/\\]foo.{c,cpp}" will match (unix or windows) paths to -/// all files named \p "foo.c" or \p "foo.cpp". +/// For example, \p "*[/\\\\]foo.{c,cpp}" (with two backslashes) will match +/// (unix or windows) paths to all files named \p "foo.c" or \p "foo.cpp". class GlobPattern { public: /// \param Pat the pattern to match against diff --git a/llvm/include/llvm/Support/raw_ostream.h b/llvm/include/llvm/Support/raw_ostream.h index 2570c826502e..34f91cbe9551 100644 --- a/llvm/include/llvm/Support/raw_ostream.h +++ b/llvm/include/llvm/Support/raw_ostream.h @@ -769,6 +769,25 @@ public: ~buffer_unique_ostream() override { *OS << str(); } }; +// Helper struct to add indentation to raw_ostream. Instead of +// OS.indent(6) << "more stuff"; +// you can use +// OS << indent(6) << "more stuff"; +// which has better ergonomics (and clang-formats better as well). +struct indent { + unsigned NumSpaces; + + explicit indent(unsigned NumSpaces) : NumSpaces(NumSpaces) {} + void operator+=(unsigned N) { NumSpaces += N; } + void operator-=(unsigned N) { NumSpaces -= N; } + indent operator+(unsigned N) const { return indent(NumSpaces + N); } + indent operator-(unsigned N) const { return indent(NumSpaces - N); } +}; + +inline raw_ostream &operator<<(raw_ostream &OS, const indent &Indent) { + return OS.indent(Indent.NumSpaces); +} + class Error; /// This helper creates an output stream and then passes it to \p Write. diff --git a/llvm/include/llvm/TableGen/Record.h b/llvm/include/llvm/TableGen/Record.h index c9e01e3f221b..5348c1177f63 100644 --- a/llvm/include/llvm/TableGen/Record.h +++ b/llvm/include/llvm/TableGen/Record.h @@ -603,6 +603,7 @@ public: Init *convertInitializerTo(RecTy *Ty) const override; Init *convertInitializerBitRange(ArrayRef<unsigned> Bits) const override; + std::optional<int64_t> convertInitializerToInt() const; bool isComplete() const override { for (unsigned i = 0; i != getNumBits(); ++i) @@ -2035,7 +2036,7 @@ public: } /// Start timing a phase. Automatically stops any previous phase timer. - void startTimer(StringRef Name); + void startTimer(StringRef Name) const; /// Stop timing a phase. void stopTimer(); @@ -2109,12 +2110,13 @@ private: mutable std::map<std::string, std::vector<Record *>> ClassRecordsMap; GlobalMap ExtraGlobals; + // TODO: Move timing related code out of RecordKeeper. // These members are for the phase timing feature. We need a timer group, // the last timer started, and a flag to say whether the last timer // is the special "backend overall timer." - TimerGroup *TimingGroup = nullptr; - Timer *LastTimer = nullptr; - bool BackendTimer = false; + mutable TimerGroup *TimingGroup = nullptr; + mutable Timer *LastTimer = nullptr; + mutable bool BackendTimer = false; /// The internal uniquer implementation of the RecordKeeper. std::unique_ptr<detail::RecordKeeperImpl> Impl; diff --git a/llvm/include/llvm/Transforms/IPO/FunctionImport.h b/llvm/include/llvm/Transforms/IPO/FunctionImport.h index 70739709a810..4b29d3f40ab7 100644 --- a/llvm/include/llvm/Transforms/IPO/FunctionImport.h +++ b/llvm/include/llvm/Transforms/IPO/FunctionImport.h @@ -270,13 +270,20 @@ public: // A map from destination modules to lists of imports. class ImportListsTy { public: - ImportListsTy() = default; - ImportListsTy(size_t Size) : ListsImpl(Size) {} + ImportListsTy() : EmptyList(ImportIDs) {} + ImportListsTy(size_t Size) : EmptyList(ImportIDs), ListsImpl(Size) {} ImportMapTy &operator[](StringRef DestMod) { return ListsImpl.try_emplace(DestMod, ImportIDs).first->second; } + const ImportMapTy &lookup(StringRef DestMod) const { + auto It = ListsImpl.find(DestMod); + if (It != ListsImpl.end()) + return It->second; + return EmptyList; + } + size_t size() const { return ListsImpl.size(); } using const_iterator = DenseMap<StringRef, ImportMapTy>::const_iterator; @@ -284,6 +291,7 @@ public: const_iterator end() const { return ListsImpl.end(); } private: + ImportMapTy EmptyList; DenseMap<StringRef, ImportMapTy> ListsImpl; ImportIDTable ImportIDs; }; diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h index 2e7a0ec29ed9..2d3d2ada6183 100644 --- a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h +++ b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h @@ -206,6 +206,7 @@ private: Value *optimizeFMinFMax(CallInst *CI, IRBuilderBase &B); Value *optimizeLog(CallInst *CI, IRBuilderBase &B); Value *optimizeSqrt(CallInst *CI, IRBuilderBase &B); + Value *optimizeFMod(CallInst *CI, IRBuilderBase &B); Value *mergeSqrtToExp(CallInst *CI, IRBuilderBase &B); Value *optimizeSinCosPi(CallInst *CI, bool IsSin, IRBuilderBase &B); Value *optimizeTrigInversionPairs(CallInst *CI, IRBuilderBase &B); diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h new file mode 100644 index 000000000000..78c1c0e4c046 --- /dev/null +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h @@ -0,0 +1,62 @@ +//===- Legality.h -----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Legality checks for the Sandbox Vectorizer. +// + +#ifndef LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_LEGALITY_H +#define LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_LEGALITY_H + +#include "llvm/SandboxIR/SandboxIR.h" + +namespace llvm::sandboxir { + +class LegalityAnalysis; + +enum class LegalityResultID { + Widen, ///> Vectorize by combining scalars to a vector. +}; + +/// The legality outcome is represented by a class rather than an enum class +/// because in some cases the legality checks are expensive and look for a +/// particular instruction that can be passed along to the vectorizer to avoid +/// repeating the same expensive computation. +class LegalityResult { +protected: + LegalityResultID ID; + /// Only Legality can create LegalityResults. + LegalityResult(LegalityResultID ID) : ID(ID) {} + friend class LegalityAnalysis; + +public: + LegalityResultID getSubclassID() const { return ID; } +}; + +class Widen final : public LegalityResult { + friend class LegalityAnalysis; + Widen() : LegalityResult(LegalityResultID::Widen) {} + +public: + static bool classof(const LegalityResult *From) { + return From->getSubclassID() == LegalityResultID::Widen; + } +}; + +/// Performs the legality analysis and returns a LegalityResult object. +class LegalityAnalysis { +public: + LegalityAnalysis() = default; + LegalityResult canVectorize(ArrayRef<Value *> Bndl) { + // TODO: For now everything is legal. + return Widen(); + } +}; + +} // namespace llvm::sandboxir + +#endif // LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_LEGALITY_H diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h index 5b3d1a50aa1e..99582e3e0e02 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h @@ -12,11 +12,18 @@ #ifndef LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_PASSES_BOTTOMUPVEC_H #define LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_PASSES_BOTTOMUPVEC_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/SandboxIR/Pass.h" +#include "llvm/SandboxIR/SandboxIR.h" +#include "llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h" namespace llvm::sandboxir { class BottomUpVec final : public FunctionPass { + bool Change = false; + LegalityAnalysis Legality; + void vectorizeRec(ArrayRef<Value *> Bndl); + void tryVectorize(ArrayRef<Value *> Seeds); public: BottomUpVec() : FunctionPass("bottom-up-vec") {} diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Region.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Region.h new file mode 100644 index 000000000000..2f893bac213a --- /dev/null +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Region.h @@ -0,0 +1,104 @@ +//===- Region.h -------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_REGION_H +#define LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_REGION_H + +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/SandboxIR/SandboxIR.h" +#include "llvm/Support/InstructionCost.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm::sandboxir { + +/// The main job of the Region is to point to new instructions generated by +/// vectorization passes. It is the unit that RegionPasses operate on with their +/// runOnRegion() function. +/// +/// The region allows us to stack transformations horizontally, meaning that +/// each transformation operates on a single region and the resulting region is +/// the input to the next transformation, as opposed to vertically, which is the +/// common way of applying a transformation across the whole function. This +/// enables us to check for profitability and decide whether we accept or +/// rollback at a region granularity, which is much better than doing this at +/// the function level. +/// +// Traditional approach: transformations applied vertically for the whole +// function +// F +// +----+ +// | | +// | | +// | | -> Transform1 -> ... -> TransformN -> Check Cost +// | | +// | | +// +----+ +// +// Region-based approach: transformations applied horizontally, for each Region +// F +// +----+ +// |Rgn1| -> Transform1 -> ... -> TransformN -> Check Cost +// | | +// |Rgn2| -> Transform1 -> ... -> TransformN -> Check Cost +// | | +// |Rgn3| -> Transform1 -> ... -> TransformN -> Check Cost +// +----+ + +class Region { + /// All the instructions in the Region. Only new instructions generated during + /// vectorization are part of the Region. + SetVector<Instruction *> Insts; + + /// A unique ID, used for debugging. + unsigned RegionID = 0; + + Context &Ctx; + + // TODO: Add cost modeling. + // TODO: Add a way to encode/decode region info to/from metadata. + +public: + Region(Context &Ctx); + ~Region(); + + Context &getContext() const { return Ctx; } + /// Returns the region's unique ID. + unsigned getID() const { return RegionID; } + + /// Adds I to the set. + void add(Instruction *I); + /// Removes I from the set. + void remove(Instruction *I); + /// Returns true if I is in the Region. + bool contains(Instruction *I) const { return Insts.contains(I); } + /// Returns true if the Region has no instructions. + bool empty() const { return Insts.empty(); } + + using iterator = decltype(Insts.begin()); + iterator begin() { return Insts.begin(); } + iterator end() { return Insts.end(); } + iterator_range<iterator> insts() { return make_range(begin(), end()); } + +#ifndef NDEBUG + /// This is an expensive check, meant for testing. + bool operator==(const Region &Other) const; + bool operator!=(const Region &other) const { return !(*this == other); } + + void dump(raw_ostream &OS) const; + void dump() const; + friend raw_ostream &operator<<(raw_ostream &OS, const Region &Rgn) { + Rgn.dump(OS); + return OS; + } +#endif +}; + +} // namespace llvm::sandboxir + +#endif // LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_REGION_H diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp index a88469ab81a8..957ac883490c 100644 --- a/llvm/lib/Analysis/Loads.cpp +++ b/llvm/lib/Analysis/Loads.cpp @@ -93,20 +93,26 @@ static bool isDereferenceableAndAlignedPointer( Visited, MaxDepth); } - bool CheckForNonNull, CheckForFreed; - APInt KnownDerefBytes(Size.getBitWidth(), - V->getPointerDereferenceableBytes(DL, CheckForNonNull, - CheckForFreed)); - if (KnownDerefBytes.getBoolValue() && KnownDerefBytes.uge(Size) && - !CheckForFreed) - if (!CheckForNonNull || - isKnownNonZero(V, SimplifyQuery(DL, DT, AC, CtxI))) { - // As we recursed through GEPs to get here, we've incrementally checked - // that each step advanced by a multiple of the alignment. If our base is - // properly aligned, then the original offset accessed must also be. - APInt Offset(DL.getTypeStoreSizeInBits(V->getType()), 0); - return isAligned(V, Offset, Alignment, DL); - } + auto IsKnownDeref = [&]() { + bool CheckForNonNull, CheckForFreed; + APInt KnownDerefBytes(Size.getBitWidth(), + V->getPointerDereferenceableBytes(DL, CheckForNonNull, + CheckForFreed)); + if (!KnownDerefBytes.getBoolValue() || !KnownDerefBytes.uge(Size) || + CheckForFreed) + return false; + if (CheckForNonNull && + !isKnownNonZero(V, SimplifyQuery(DL, DT, AC, CtxI))) + return false; + return true; + }; + if (IsKnownDeref()) { + // As we recursed through GEPs to get here, we've incrementally checked + // that each step advanced by a multiple of the alignment. If our base is + // properly aligned, then the original offset accessed must also be. + APInt Offset(DL.getTypeStoreSizeInBits(V->getType()), 0); + return isAligned(V, Offset, Alignment, DL); + } /// TODO refactor this function to be able to search independently for /// Dereferencability and Alignment requirements. diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 5710bda2b2cf..07c189344c64 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -838,7 +838,7 @@ void IRTranslator::splitWorkItem(SwitchCG::SwitchWorkList &WorkList, void IRTranslator::emitJumpTable(SwitchCG::JumpTable &JT, MachineBasicBlock *MBB) { // Emit the code for the jump table - assert(JT.Reg != -1U && "Should lower JT Header first!"); + assert(JT.Reg && "Should lower JT Header first!"); MachineIRBuilder MIB(*MBB->getParent()); MIB.setMBB(*MBB); MIB.setDebugLoc(CurBuilder->getDebugLoc()); diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 01e47bd2fb40..e64d3f51a011 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -485,6 +485,10 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { RTLIBCASE(RINT_F); case TargetOpcode::G_FNEARBYINT: RTLIBCASE(NEARBYINT_F); + case TargetOpcode::G_INTRINSIC_TRUNC: + RTLIBCASE(TRUNC_F); + case TargetOpcode::G_INTRINSIC_ROUND: + RTLIBCASE(ROUND_F); case TargetOpcode::G_INTRINSIC_ROUNDEVEN: RTLIBCASE(ROUNDEVEN_F); case TargetOpcode::G_INTRINSIC_LRINT: @@ -1215,6 +1219,8 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) { case TargetOpcode::G_FSQRT: case TargetOpcode::G_FRINT: case TargetOpcode::G_FNEARBYINT: + case TargetOpcode::G_INTRINSIC_TRUNC: + case TargetOpcode::G_INTRINSIC_ROUND: case TargetOpcode::G_INTRINSIC_ROUNDEVEN: { LLT LLTy = MRI.getType(MI.getOperand(0).getReg()); unsigned Size = LLTy.getSizeInBits(); diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index 6eed73c15f09..1fcbeeec6f64 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -1739,7 +1739,7 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { } if (IndexOp.getImm() != 0 && - Src1Ty.getElementCount().getKnownMinValue() % IndexOp.getImm() != 0) { + IndexOp.getImm() % Src1Ty.getElementCount().getKnownMinValue() != 0) { report("Index must be a multiple of the second source vector's " "minimum vector length", MI); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 325aba461e80..2c81c829e75c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -3474,7 +3474,7 @@ bool DAGTypeLegalizer::SoftPromoteHalfOperand(SDNode *N, unsigned OpNo) { assert(Res.getNode() != N && "Expected a new node!"); - assert(Res.getValueType() == N->getValueType(0) && + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && "Invalid operand expansion"); ReplaceValueWith(SDValue(N, 0), Res); @@ -3544,7 +3544,8 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_TO_XINT(SDNode *N) { Op = DAG.getNode(N->getOpcode(), dl, {RVT, MVT::Other}, {Op.getValue(1), Op}); ReplaceValueWith(SDValue(N, 1), Op.getValue(1)); - return Op; + ReplaceValueWith(SDValue(N, 0), Op); + return SDValue(); } SDValue Res = DAG.getNode(GetPromotionOpcode(SVT, RVT), dl, NVT, Op); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 44ec6f7cab14..3918da3ef031 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -760,7 +760,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddPointer(cast<BasicBlockSDNode>(N)->getBasicBlock()); break; case ISD::Register: - ID.AddInteger(cast<RegisterSDNode>(N)->getReg()); + ID.AddInteger(cast<RegisterSDNode>(N)->getReg().id()); break; case ISD::RegisterMask: ID.AddPointer(cast<RegisterMaskSDNode>(N)->getRegMask()); @@ -2292,16 +2292,16 @@ SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) { return getVectorShuffle(VT, SDLoc(&SV), Op1, Op0, MaskVec); } -SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) { +SDValue SelectionDAG::getRegister(Register Reg, EVT VT) { SDVTList VTs = getVTList(VT); FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::Register, VTs, std::nullopt); - ID.AddInteger(RegNo); + ID.AddInteger(Reg.id()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - auto *N = newSDNode<RegisterSDNode>(RegNo, VTs); + auto *N = newSDNode<RegisterSDNode>(Reg, VTs); N->SDNodeBits.IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, UA); CSEMap.InsertNode(N, IP); InsertNode(N); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 1dbcf8fd7651..eec89f04c635 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -845,13 +845,13 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, } } -RegsForValue::RegsForValue(const SmallVector<unsigned, 4> ®s, MVT regvt, +RegsForValue::RegsForValue(const SmallVector<Register, 4> ®s, MVT regvt, EVT valuevt, std::optional<CallingConv::ID> CC) : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs), RegCount(1, regs.size()), CallConv(CC) {} RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI, - const DataLayout &DL, unsigned Reg, Type *Ty, + const DataLayout &DL, Register Reg, Type *Ty, std::optional<CallingConv::ID> CC) { ComputeValueVTs(TLI, DL, Ty, ValueVTs); @@ -870,7 +870,7 @@ RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI, Regs.push_back(Reg + i); RegVTs.push_back(RegisterVT); RegCount.push_back(NumRegs); - Reg += NumRegs; + Reg = Reg.id() + NumRegs; } } @@ -1070,9 +1070,9 @@ void RegsForValue::AddInlineAsmOperands(InlineAsm::Kind Code, bool HasMatching, } } -SmallVector<std::pair<unsigned, TypeSize>, 4> +SmallVector<std::pair<Register, TypeSize>, 4> RegsForValue::getRegsAndSizes() const { - SmallVector<std::pair<unsigned, TypeSize>, 4> OutVec; + SmallVector<std::pair<Register, TypeSize>, 4> OutVec; unsigned I = 0; for (auto CountAndVT : zip_first(RegCount, RegVTs)) { unsigned RegCount = std::get<0>(CountAndVT); @@ -2183,7 +2183,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { } if (!FuncInfo.CanLowerReturn) { - unsigned DemoteReg = FuncInfo.DemoteRegister; + Register DemoteReg = FuncInfo.DemoteRegister; const Function *F = I.getParent()->getParent(); // Emit a store of the return value through the virtual register. @@ -2981,7 +2981,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, void SelectionDAGBuilder::visitJumpTable(SwitchCG::JumpTable &JT) { // Emit the code for the jump table assert(JT.SL && "Should set SDLoc for SelectionDAG!"); - assert(JT.Reg != -1U && "Should lower JT Header first!"); + assert(JT.Reg && "Should lower JT Header first!"); EVT PTy = DAG.getTargetLoweringInfo().getJumpTableRegTy(DAG.getDataLayout()); SDValue Index = DAG.getCopyFromReg(getControlRoot(), *JT.SL, JT.Reg, PTy); SDValue Table = DAG.getJumpTable(JT.JTI, PTy); @@ -3013,7 +3013,7 @@ void SelectionDAGBuilder::visitJumpTableHeader(SwitchCG::JumpTable &JT, SwitchOp = DAG.getZExtOrTrunc(Sub, dl, TLI.getJumpTableRegTy(DAG.getDataLayout())); - unsigned JumpTableReg = + Register JumpTableReg = FuncInfo.CreateReg(TLI.getJumpTableRegTy(DAG.getDataLayout())); SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl, JumpTableReg, SwitchOp); @@ -3261,10 +3261,9 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, /// visitBitTestCase - this function produces one "bit test" void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, - MachineBasicBlock* NextMBB, + MachineBasicBlock *NextMBB, BranchProbability BranchProbToNext, - unsigned Reg, - BitTestCase &B, + Register Reg, BitTestCase &B, MachineBasicBlock *SwitchBB) { SDLoc dl = getCurSDLoc(); MVT VT = BB.RegVT; @@ -5956,7 +5955,7 @@ static SDValue expandDivFix(unsigned Opcode, const SDLoc &DL, // getUnderlyingArgRegs - Find underlying registers used for a truncated, // bitcasted, or split argument. Returns a list of <Register, size in bits> static void -getUnderlyingArgRegs(SmallVectorImpl<std::pair<unsigned, TypeSize>> &Regs, +getUnderlyingArgRegs(SmallVectorImpl<std::pair<Register, TypeSize>> &Regs, const SDValue &N) { switch (N.getOpcode()) { case ISD::CopyFromReg: { @@ -6101,7 +6100,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( if (FI != std::numeric_limits<int>::max()) Op = MachineOperand::CreateFI(FI); - SmallVector<std::pair<unsigned, TypeSize>, 8> ArgRegsAndSizes; + SmallVector<std::pair<Register, TypeSize>, 8> ArgRegsAndSizes; if (!Op && N.getNode()) { getUnderlyingArgRegs(ArgRegsAndSizes, N); Register Reg; @@ -6131,7 +6130,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( if (!Op) { // Create a DBG_VALUE for each decomposed value in ArgRegs to cover Reg - auto splitMultiRegDbgValue = [&](ArrayRef<std::pair<unsigned, TypeSize>> + auto splitMultiRegDbgValue = [&](ArrayRef<std::pair<Register, TypeSize>> SplitRegs) { unsigned Offset = 0; for (const auto &RegAndSize : SplitRegs) { @@ -7748,7 +7747,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, const auto *CPI = cast<CatchPadInst>(I.getArgOperand(0)); MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); const TargetRegisterClass *PtrRC = TLI.getRegClassFor(PtrVT); - unsigned VReg = FuncInfo.getCatchPadExceptionPointerVReg(CPI, PtrRC); + Register VReg = FuncInfo.getCatchPadExceptionPointerVReg(CPI, PtrRC); SDValue N = DAG.getCopyFromReg(DAG.getEntryNode(), sdl, VReg, PtrVT); if (Intrinsic == Intrinsic::eh_exceptioncode) N = DAG.getZExtOrTrunc(N, sdl, MVT::i32); @@ -9653,7 +9652,7 @@ getRegistersForValue(SelectionDAG &DAG, const SDLoc &DL, const TargetLowering &TLI = DAG.getTargetLoweringInfo(); MachineFunction &MF = DAG.getMachineFunction(); - SmallVector<unsigned, 4> Regs; + SmallVector<Register, 4> Regs; const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); // No work to do for memory/address operands. @@ -10078,7 +10077,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, return; } - SmallVector<unsigned, 4> Regs; + SmallVector<Register, 4> Regs; MachineFunction &MF = DAG.getMachineFunction(); MachineRegisterInfo &MRI = MF.getRegInfo(); const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); @@ -11817,8 +11816,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // Update the SwiftErrorVRegDefMap. if (Res.getOpcode() == ISD::CopyFromReg && isSwiftErrorArg) { - unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg(); - if (Register::isVirtualRegister(Reg)) + Register Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg(); + if (Reg.isVirtual()) SwiftError->setCurrentVReg(FuncInfo->MBB, SwiftError->getFunctionArg(), Reg); } @@ -11829,8 +11828,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // If we can, though, try to skip creating an unnecessary vreg. // FIXME: This isn't very clean... it would be nice to make this more // general. - unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg(); - if (Register::isVirtualRegister(Reg)) { + Register Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg(); + if (Reg.isVirtual()) { FuncInfo->ValueMap[&Arg] = Reg; continue; } @@ -12654,7 +12653,7 @@ void SelectionDAGBuilder::visitCallBrLandingPad(const CallInst &I) { // getRegistersForValue may produce 1 to many registers based on whether // the OpInfo.ConstraintVT is legal on the target or not. - for (unsigned &Reg : OpInfo.AssignedRegs.Regs) { + for (Register &Reg : OpInfo.AssignedRegs.Regs) { Register OriginalDef = FollowCopyChain(MRI, InitialDef++); if (Register::isPhysicalRegister(OriginalDef)) FuncInfo.MBB->addLiveIn(OriginalDef); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index b13a2df7b48e..3f8a3e7ffb65 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -526,7 +526,7 @@ public: void visitBitTestHeader(SwitchCG::BitTestBlock &B, MachineBasicBlock *SwitchBB); void visitBitTestCase(SwitchCG::BitTestBlock &BB, MachineBasicBlock *NextMBB, - BranchProbability BranchProbToNext, unsigned Reg, + BranchProbability BranchProbToNext, Register Reg, SwitchCG::BitTestCase &B, MachineBasicBlock *SwitchBB); void visitJumpTable(SwitchCG::JumpTable &JT); void visitJumpTableHeader(SwitchCG::JumpTable &JT, @@ -740,7 +740,7 @@ struct RegsForValue { /// This list holds the registers assigned to the values. /// Each legal or promoted value requires one register, and each /// expanded value requires multiple registers. - SmallVector<unsigned, 4> Regs; + SmallVector<Register, 4> Regs; /// This list holds the number of registers for each value. SmallVector<unsigned, 4> RegCount; @@ -750,10 +750,10 @@ struct RegsForValue { std::optional<CallingConv::ID> CallConv; RegsForValue() = default; - RegsForValue(const SmallVector<unsigned, 4> ®s, MVT regvt, EVT valuevt, + RegsForValue(const SmallVector<Register, 4> ®s, MVT regvt, EVT valuevt, std::optional<CallingConv::ID> CC = std::nullopt); RegsForValue(LLVMContext &Context, const TargetLowering &TLI, - const DataLayout &DL, unsigned Reg, Type *Ty, + const DataLayout &DL, Register Reg, Type *Ty, std::optional<CallingConv::ID> CC); bool isABIMangled() const { return CallConv.has_value(); } @@ -796,7 +796,7 @@ struct RegsForValue { } /// Return a list of registers and their sizes. - SmallVector<std::pair<unsigned, TypeSize>, 4> getRegsAndSizes() const; + SmallVector<std::pair<Register, TypeSize>, 4> getRegsAndSizes() const; }; } // end namespace llvm diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 263a213bd4f6..2a97580942df 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -895,8 +895,8 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() { if (N->getOpcode() != ISD::CopyToReg) continue; - unsigned DestReg = cast<RegisterSDNode>(N->getOperand(1))->getReg(); - if (!Register::isVirtualRegister(DestReg)) + Register DestReg = cast<RegisterSDNode>(N->getOperand(1))->getReg(); + if (!DestReg.isVirtual()) continue; // Ignore non-integer values. diff --git a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp index e741a0fc49fb..038c499fe236 100644 --- a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp +++ b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp @@ -254,7 +254,7 @@ bool SwitchCG::SwitchLowering::buildJumpTable(const CaseClusterVector &Clusters, ->createJumpTableIndex(Table); // Set up the jump table info. - JumpTable JT(-1U, JTI, JumpTableMBB, nullptr, SL); + JumpTable JT(Register(), JTI, JumpTableMBB, nullptr, SL); JumpTableHeader JTH(Clusters[First].Low->getValue(), Clusters[Last].High->getValue(), SI->getCondition(), nullptr, false); @@ -455,7 +455,7 @@ bool SwitchCG::SwitchLowering::buildBitTests(CaseClusterVector &Clusters, BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraProb)); } BitTestCases.emplace_back(std::move(LowBound), std::move(CmpRange), - SI->getCondition(), -1U, MVT::Other, false, + SI->getCondition(), Register(), MVT::Other, false, ContiguousRange, nullptr, nullptr, std::move(BTI), TotalProb); diff --git a/llvm/lib/Demangle/MicrosoftDemangle.cpp b/llvm/lib/Demangle/MicrosoftDemangle.cpp index c5835e8c2e98..aa65f3be29da 100644 --- a/llvm/lib/Demangle/MicrosoftDemangle.cpp +++ b/llvm/lib/Demangle/MicrosoftDemangle.cpp @@ -2035,6 +2035,10 @@ Demangler::demanglePrimitiveType(std::string_view &MangledName) { return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char16); case 'U': return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char32); + case 'P': + return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Auto); + case 'T': + return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::DecltypeAuto); } break; } diff --git a/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp b/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp index 9a9c34ec6d34..ec6e67058c68 100644 --- a/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp +++ b/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp @@ -149,6 +149,8 @@ void PrimitiveTypeNode::outputPre(OutputBuffer &OB, OutputFlags Flags) const { OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Double, "double"); OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Ldouble, "long double"); OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Nullptr, "std::nullptr_t"); + OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Auto, "auto"); + OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, DecltypeAuto, "decltype(auto)"); } outputQualifiers(OB, Quals, true, false); } diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp index 66e52fe2d08f..9eff35642c9f 100644 --- a/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/llvm/lib/MC/MCParser/AsmParser.cpp @@ -2322,7 +2322,7 @@ bool AsmParser::parseAndMatchAndEmitTargetInstruction(ParseStatementInfo &Info, // Canonicalize the opcode to lower case. std::string OpcodeStr = IDVal.lower(); ParseInstructionInfo IInfo(Info.AsmRewrites); - bool ParseHadError = getTargetParser().ParseInstruction(IInfo, OpcodeStr, ID, + bool ParseHadError = getTargetParser().parseInstruction(IInfo, OpcodeStr, ID, Info.ParsedOperands); Info.ParseError = ParseHadError; @@ -2379,7 +2379,7 @@ bool AsmParser::parseAndMatchAndEmitTargetInstruction(ParseStatementInfo &Info, // If parsing succeeded, match the instruction. if (!ParseHadError) { uint64_t ErrorInfo; - if (getTargetParser().MatchAndEmitInstruction( + if (getTargetParser().matchAndEmitInstruction( IDLoc, Info.Opcode, Info.ParsedOperands, Out, ErrorInfo, getTargetParser().isParsingMSInlineAsm())) return true; @@ -6029,7 +6029,7 @@ bool AsmParser::parseMSInlineAsm( // Register operand. if (Operand.isReg() && !Operand.needAddressOf() && - !getTargetParser().OmitRegisterFromClobberLists(Operand.getReg())) { + !getTargetParser().omitRegisterFromClobberLists(Operand.getReg())) { unsigned NumDefs = Desc.getNumDefs(); // Clobber. if (NumDefs && Operand.getMCOperandNum() < NumDefs) diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp index 9f619c5018b5..0c64af9e460e 100644 --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -2657,7 +2657,7 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info, // Canonicalize the opcode to lower case. std::string OpcodeStr = IDVal.lower(); ParseInstructionInfo IInfo(Info.AsmRewrites); - bool ParseHadError = getTargetParser().ParseInstruction(IInfo, OpcodeStr, ID, + bool ParseHadError = getTargetParser().parseInstruction(IInfo, OpcodeStr, ID, Info.ParsedOperands); Info.ParseError = ParseHadError; @@ -2714,7 +2714,7 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info, // If parsing succeeded, match the instruction. if (!ParseHadError) { uint64_t ErrorInfo; - if (getTargetParser().MatchAndEmitInstruction( + if (getTargetParser().matchAndEmitInstruction( IDLoc, Info.Opcode, Info.ParsedOperands, Out, ErrorInfo, getTargetParser().isParsingMSInlineAsm())) return true; @@ -7389,7 +7389,7 @@ bool MasmParser::parseMSInlineAsm( // Register operand. if (Operand.isReg() && !Operand.needAddressOf() && - !getTargetParser().OmitRegisterFromClobberLists(Operand.getReg())) { + !getTargetParser().omitRegisterFromClobberLists(Operand.getReg())) { unsigned NumDefs = Desc.getNumDefs(); // Clobber. if (NumDefs && Operand.getMCOperandNum() < NumDefs) diff --git a/llvm/lib/SandboxIR/SandboxIR.cpp b/llvm/lib/SandboxIR/SandboxIR.cpp index c26ba1983db9..f8faef5a386a 100644 --- a/llvm/lib/SandboxIR/SandboxIR.cpp +++ b/llvm/lib/SandboxIR/SandboxIR.cpp @@ -306,6 +306,11 @@ BBIterator &BBIterator::operator--() { return *this; } +BasicBlock *BBIterator::getNodeParent() const { + llvm::BasicBlock *Parent = const_cast<BBIterator *>(this)->It.getNodeParent(); + return cast<BasicBlock>(Ctx->getValue(Parent)); +} + const char *Instruction::getOpcodeName(Opcode Opc) { switch (Opc) { #define OP(OPC) \ @@ -2534,6 +2539,8 @@ template class GlobalWithNodeAPI<Function, llvm::Function, GlobalObject, llvm::GlobalObject>; template class GlobalWithNodeAPI<GlobalVariable, llvm::GlobalVariable, GlobalObject, llvm::GlobalObject>; +template class GlobalWithNodeAPI<GlobalAlias, llvm::GlobalAlias, GlobalValue, + llvm::GlobalValue>; } // namespace llvm::sandboxir void GlobalIFunc::setResolver(Constant *Resolver) { @@ -2587,6 +2594,24 @@ void GlobalVariable::setExternallyInitialized(bool V) { cast<llvm::GlobalVariable>(Val)->setExternallyInitialized(V); } +void GlobalAlias::setAliasee(Constant *Aliasee) { + Ctx.getTracker() + .emplaceIfTracking< + GenericSetter<&GlobalAlias::getAliasee, &GlobalAlias::setAliasee>>( + this); + cast<llvm::GlobalAlias>(Val)->setAliasee(cast<llvm::Constant>(Aliasee->Val)); +} + +Constant *GlobalAlias::getAliasee() const { + return cast<Constant>( + Ctx.getOrCreateConstant(cast<llvm::GlobalAlias>(Val)->getAliasee())); +} + +const GlobalObject *GlobalAlias::getAliaseeObject() const { + return cast<GlobalObject>(Ctx.getOrCreateConstant( + cast<llvm::GlobalAlias>(Val)->getAliaseeObject())); +} + void GlobalValue::setUnnamedAddr(UnnamedAddr V) { Ctx.getTracker() .emplaceIfTracking<GenericSetter<&GlobalValue::getUnnamedAddr, @@ -2803,6 +2828,10 @@ Value *Context::getOrCreateValueInternal(llvm::Value *LLVMV, llvm::User *U) { It->second = std::unique_ptr<GlobalVariable>( new GlobalVariable(cast<llvm::GlobalVariable>(C), *this)); break; + case llvm::Value::GlobalAliasVal: + It->second = std::unique_ptr<GlobalAlias>( + new GlobalAlias(cast<llvm::GlobalAlias>(C), *this)); + break; default: It->second = std::unique_ptr<Constant>(new Constant(C, *this)); break; diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp index 97ae0b092b81..ff2da3badb36 100644 --- a/llvm/lib/TableGen/Record.cpp +++ b/llvm/lib/TableGen/Record.cpp @@ -497,18 +497,24 @@ Init *BitsInit::convertInitializerTo(RecTy *Ty) const { } if (isa<IntRecTy>(Ty)) { - int64_t Result = 0; - for (unsigned i = 0, e = getNumBits(); i != e; ++i) - if (auto *Bit = dyn_cast<BitInit>(getBit(i))) - Result |= static_cast<int64_t>(Bit->getValue()) << i; - else - return nullptr; - return IntInit::get(getRecordKeeper(), Result); + std::optional<int64_t> Result = convertInitializerToInt(); + if (Result) + return IntInit::get(getRecordKeeper(), *Result); } return nullptr; } +std::optional<int64_t> BitsInit::convertInitializerToInt() const { + int64_t Result = 0; + for (unsigned i = 0, e = getNumBits(); i != e; ++i) + if (auto *Bit = dyn_cast<BitInit>(getBit(i))) + Result |= static_cast<int64_t>(Bit->getValue()) << i; + else + return std::nullopt; + return Result; +} + Init * BitsInit::convertInitializerBitRange(ArrayRef<unsigned> Bits) const { SmallVector<Init *, 16> NewBits(Bits.size()); @@ -3219,7 +3225,7 @@ Init *RecordKeeper::getNewAnonymousName() { // These functions implement the phase timing facility. Starting a timer // when one is already running stops the running one. -void RecordKeeper::startTimer(StringRef Name) { +void RecordKeeper::startTimer(StringRef Name) const { if (TimingGroup) { if (LastTimer && LastTimer->isRunning()) { LastTimer->stopTimer(); diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td index 8a7d2af34498..737fc7390455 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td +++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td @@ -836,9 +836,11 @@ def : InstRW<[N2Write_3c_1V], (instregex "^FCVT[AMNPZ][SU][SU][WX][HSD]r$")>; def : SchedAlias<WriteFCvt, N2Write_3c_1V0>; // FP move, immed -// FP move, register def : SchedAlias<WriteFImm, N2Write_2c_1V>; +// FP move, register +def : InstRW<[N2Write_2c_1V], (instrs FMOVHr, FMOVSr, FMOVDr)>; + // FP transfer, from gen to low half of vec reg def : InstRW<[N2Write_3c_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr, FMOVHWr, FMOVHXr, FMOVSWr, FMOVDXr)>; @@ -858,9 +860,8 @@ def : InstRW<[N2Write_6c_1L], (instregex "^LDR[SDQ]l$", "^LDUR[BHSDQ]i$")>; // Load vector reg, immed post-index -def : InstRW<[N2Write_6c_1I_1L, WriteI], (instregex "^LDR[BHSDQ]post$")>; // Load vector reg, immed pre-index -def : InstRW<[WriteAdr, N2Write_6c_1I_1L], (instregex "^LDR[BHSDQ]pre$")>; +def : InstRW<[WriteAdr, N2Write_6c_1I_1L], (instregex "^LDR[BHSDQ](post|pre)$")>; // Load vector reg, unsigned immed def : InstRW<[N2Write_6c_1L], (instregex "^LDR[BHSDQ]ui$")>; @@ -1119,7 +1120,7 @@ def : InstRW<[N2Write_5c_1V], (instregex "^FMLALv", "^FMLSLv")>; // ASIMD FP round, D-form F32 and Q-form F64 def : InstRW<[N2Write_3c_1V0], (instregex "^FRINT[AIMNPXZ]v2f(32|64)$", - "^FRINT[32|64)[XZ]v2f(32|64)$")>; + "^FRINT(32|64)[XZ]v2f(32|64)$")>; // ASIMD FP round, D-form F16 and Q-form F32 def : InstRW<[N2Write_4c_2V0], diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 13a7eef47885..4f6131fd8355 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -231,12 +231,12 @@ private: bool validateInstruction(MCInst &Inst, SMLoc &IDLoc, SmallVectorImpl<SMLoc> &Loc); unsigned getNumRegsForRegKind(RegKind K); - bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) override; -/// @name Auto-generated Match Functions -/// { + /// @name Auto-generated Match Functions + /// { #define GET_ASSEMBLER_HEADER #include "AArch64GenAsmMatcher.inc" @@ -321,7 +321,7 @@ public: bool areEqualRegs(const MCParsedAsmOperand &Op1, const MCParsedAsmOperand &Op2) const override; - bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + bool parseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) override; bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override; ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, @@ -5086,9 +5086,8 @@ bool AArch64AsmParser::areEqualRegs(const MCParsedAsmOperand &Op1, return false; } -/// ParseInstruction - Parse an AArch64 instruction mnemonic followed by its -/// operands. -bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info, +/// Parse an AArch64 instruction mnemonic followed by its operands. +bool AArch64AsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) { Name = StringSwitch<StringRef>(Name.lower()) @@ -6205,7 +6204,7 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode, static const char *getSubtargetFeatureName(uint64_t Val); -bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, +bool AArch64AsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp index 9f8926432d00..e8674c4c7759 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp @@ -640,27 +640,38 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { break; } case Intrinsic::amdgcn_cvt_pkrtz: { - Value *Src0 = II.getArgOperand(0); - Value *Src1 = II.getArgOperand(1); - if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) { - if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) { - const fltSemantics &HalfSem = - II.getType()->getScalarType()->getFltSemantics(); + auto foldFPTruncToF16RTZ = [](Value *Arg) -> Value * { + Type *HalfTy = Type::getHalfTy(Arg->getContext()); + + if (isa<PoisonValue>(Arg)) + return PoisonValue::get(HalfTy); + if (isa<UndefValue>(Arg)) + return UndefValue::get(HalfTy); + + ConstantFP *CFP = nullptr; + if (match(Arg, m_ConstantFP(CFP))) { bool LosesInfo; - APFloat Val0 = C0->getValueAPF(); - APFloat Val1 = C1->getValueAPF(); - Val0.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo); - Val1.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo); - - Constant *Folded = - ConstantVector::get({ConstantFP::get(II.getContext(), Val0), - ConstantFP::get(II.getContext(), Val1)}); - return IC.replaceInstUsesWith(II, Folded); + APFloat Val(CFP->getValueAPF()); + Val.convert(APFloat::IEEEhalf(), APFloat::rmTowardZero, &LosesInfo); + return ConstantFP::get(HalfTy, Val); } - } - if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1)) { - return IC.replaceInstUsesWith(II, UndefValue::get(II.getType())); + Value *Src = nullptr; + if (match(Arg, m_FPExt(m_Value(Src)))) { + if (Src->getType()->isHalfTy()) + return Src; + } + + return nullptr; + }; + + if (Value *Src0 = foldFPTruncToF16RTZ(II.getArgOperand(0))) { + if (Value *Src1 = foldFPTruncToF16RTZ(II.getArgOperand(1))) { + Value *V = PoisonValue::get(II.getType()); + V = IC.Builder.CreateInsertElement(V, Src0, (uint64_t)0); + V = IC.Builder.CreateInsertElement(V, Src1, (uint64_t)1); + return IC.replaceInstUsesWith(II, V); + } } break; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index f2c9619cb827..bc771d4ef6c0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -3076,11 +3076,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl( case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR: case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR: case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC: - case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC: { - applyDefaultMapping(OpdMapper); - executeInWaterfallLoop(B, MI, {2, 5}); - return; - } + case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC: case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD: case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN: case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX: { diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 5db6c52d189e..bab3f8a08781 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1256,7 +1256,7 @@ class KernelScopeInfo { } void usesAgprAt(int i) { - // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction + // Instruction will error in AMDGPUAsmParser::matchAndEmitInstruction if (!hasMAIInsts(*MSTI)) return; @@ -1597,7 +1597,7 @@ public: unsigned checkTargetMatchPredicate(MCInst &Inst) override; unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, unsigned Kind) override; - bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) override; @@ -1605,7 +1605,7 @@ public: ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic, OperandMode Mode = OperandMode_Default); StringRef parseMnemonicSuffix(StringRef Name); - bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + bool parseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) override; //bool ProcessInstruction(MCInst &Inst); @@ -5288,7 +5288,7 @@ static bool isInvalidVOPDY(const OperandVector &Operands, return false; } -bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, +bool AMDGPUAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, @@ -6393,9 +6393,9 @@ static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID); -bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, - StringRef Name, - SMLoc NameLoc, OperandVector &Operands) { +bool AMDGPUAsmParser::parseInstruction(ParseInstructionInfo &Info, + StringRef Name, SMLoc NameLoc, + OperandVector &Operands) { // Add the instruction mnemonic Name = parseMnemonicSuffix(Name); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 4a861f0c03a0..b197f38d054f 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -7443,98 +7443,49 @@ SDValue SITargetLowering::lowerBUILD_VECTOR(SDValue Op, SDLoc SL(Op); EVT VT = Op.getValueType(); - if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v8i16 || - VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { - EVT HalfVT = MVT::getVectorVT(VT.getVectorElementType().getSimpleVT(), - VT.getVectorNumElements() / 2); - MVT HalfIntVT = MVT::getIntegerVT(HalfVT.getSizeInBits()); + if (VT == MVT::v2f16 || VT == MVT::v2i16 || VT == MVT::v2bf16) { + assert(!Subtarget->hasVOP3PInsts() && "this should be legal"); - // Turn into pair of packed build_vectors. - // TODO: Special case for constants that can be materialized with s_mov_b64. - SmallVector<SDValue, 4> LoOps, HiOps; - for (unsigned I = 0, E = VT.getVectorNumElements() / 2; I != E; ++I) { - LoOps.push_back(Op.getOperand(I)); - HiOps.push_back(Op.getOperand(I + E)); - } - SDValue Lo = DAG.getBuildVector(HalfVT, SL, LoOps); - SDValue Hi = DAG.getBuildVector(HalfVT, SL, HiOps); - - SDValue CastLo = DAG.getNode(ISD::BITCAST, SL, HalfIntVT, Lo); - SDValue CastHi = DAG.getNode(ISD::BITCAST, SL, HalfIntVT, Hi); - - SDValue Blend = DAG.getBuildVector(MVT::getVectorVT(HalfIntVT, 2), SL, - { CastLo, CastHi }); - return DAG.getNode(ISD::BITCAST, SL, VT, Blend); - } + SDValue Lo = Op.getOperand(0); + SDValue Hi = Op.getOperand(1); - if (VT == MVT::v16i16 || VT == MVT::v16f16 || VT == MVT::v16bf16) { - EVT QuarterVT = MVT::getVectorVT(VT.getVectorElementType().getSimpleVT(), - VT.getVectorNumElements() / 4); - MVT QuarterIntVT = MVT::getIntegerVT(QuarterVT.getSizeInBits()); - - SmallVector<SDValue, 4> Parts[4]; - for (unsigned I = 0, E = VT.getVectorNumElements() / 4; I != E; ++I) { - for (unsigned P = 0; P < 4; ++P) - Parts[P].push_back(Op.getOperand(I + P * E)); - } - SDValue Casts[4]; - for (unsigned P = 0; P < 4; ++P) { - SDValue Vec = DAG.getBuildVector(QuarterVT, SL, Parts[P]); - Casts[P] = DAG.getNode(ISD::BITCAST, SL, QuarterIntVT, Vec); + // Avoid adding defined bits with the zero_extend. + if (Hi.isUndef()) { + Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Lo); + SDValue ExtLo = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i32, Lo); + return DAG.getNode(ISD::BITCAST, SL, VT, ExtLo); } - SDValue Blend = - DAG.getBuildVector(MVT::getVectorVT(QuarterIntVT, 4), SL, Casts); - return DAG.getNode(ISD::BITCAST, SL, VT, Blend); - } + Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Hi); + Hi = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Hi); - if (VT == MVT::v32i16 || VT == MVT::v32f16 || VT == MVT::v32bf16) { - EVT QuarterVT = MVT::getVectorVT(VT.getVectorElementType().getSimpleVT(), - VT.getVectorNumElements() / 8); - MVT QuarterIntVT = MVT::getIntegerVT(QuarterVT.getSizeInBits()); + SDValue ShlHi = DAG.getNode(ISD::SHL, SL, MVT::i32, Hi, + DAG.getConstant(16, SL, MVT::i32)); + if (Lo.isUndef()) + return DAG.getNode(ISD::BITCAST, SL, VT, ShlHi); - SmallVector<SDValue, 8> Parts[8]; - for (unsigned I = 0, E = VT.getVectorNumElements() / 8; I != E; ++I) { - for (unsigned P = 0; P < 8; ++P) - Parts[P].push_back(Op.getOperand(I + P * E)); - } - SDValue Casts[8]; - for (unsigned P = 0; P < 8; ++P) { - SDValue Vec = DAG.getBuildVector(QuarterVT, SL, Parts[P]); - Casts[P] = DAG.getNode(ISD::BITCAST, SL, QuarterIntVT, Vec); - } + Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Lo); + Lo = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Lo); - SDValue Blend = - DAG.getBuildVector(MVT::getVectorVT(QuarterIntVT, 8), SL, Casts); - return DAG.getNode(ISD::BITCAST, SL, VT, Blend); + SDValue Or = DAG.getNode(ISD::OR, SL, MVT::i32, Lo, ShlHi); + return DAG.getNode(ISD::BITCAST, SL, VT, Or); } - assert(VT == MVT::v2f16 || VT == MVT::v2i16 || VT == MVT::v2bf16); - assert(!Subtarget->hasVOP3PInsts() && "this should be legal"); + // Split into 2-element chunks. + const unsigned NumParts = VT.getVectorNumElements() / 2; + EVT PartVT = MVT::getVectorVT(VT.getVectorElementType().getSimpleVT(), 2); + MVT PartIntVT = MVT::getIntegerVT(PartVT.getSizeInBits()); - SDValue Lo = Op.getOperand(0); - SDValue Hi = Op.getOperand(1); - - // Avoid adding defined bits with the zero_extend. - if (Hi.isUndef()) { - Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Lo); - SDValue ExtLo = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i32, Lo); - return DAG.getNode(ISD::BITCAST, SL, VT, ExtLo); + SmallVector<SDValue> Casts; + for (unsigned P = 0; P < NumParts; ++P) { + SDValue Vec = DAG.getBuildVector( + PartVT, SL, {Op.getOperand(P * 2), Op.getOperand(P * 2 + 1)}); + Casts.push_back(DAG.getNode(ISD::BITCAST, SL, PartIntVT, Vec)); } - Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Hi); - Hi = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Hi); - - SDValue ShlHi = DAG.getNode(ISD::SHL, SL, MVT::i32, Hi, - DAG.getConstant(16, SL, MVT::i32)); - if (Lo.isUndef()) - return DAG.getNode(ISD::BITCAST, SL, VT, ShlHi); - - Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Lo); - Lo = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Lo); - - SDValue Or = DAG.getNode(ISD::OR, SL, MVT::i32, Lo, ShlHi); - return DAG.getNode(ISD::BITCAST, SL, VT, Or); + SDValue Blend = + DAG.getBuildVector(MVT::getVectorVT(PartIntVT, NumParts), SL, Casts); + return DAG.getNode(ISD::BITCAST, SL, VT, Blend); } bool @@ -10062,8 +10013,6 @@ SDValue SITargetLowering::bufferRsrcPtrToVector(SDValue MaybePointer, if (!MaybePointer.getValueType().isScalarInteger()) return MaybePointer; - SDLoc DL(MaybePointer); - SDValue Rsrc = DAG.getBitcast(MVT::v4i32, MaybePointer); return Rsrc; } diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index fd9fe1196b78..a56682726013 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -820,7 +820,7 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII, Inst.getOpcode() != AMDGPU::DS_CONSUME && Inst.getOpcode() != AMDGPU::DS_ORDERED_COUNT) { for (const MachineOperand &Op : Inst.all_uses()) { - if (Op.isReg() && TRI->isVectorRegister(*MRI, Op.getReg())) + if (TRI->isVectorRegister(*MRI, Op.getReg())) setExpScore(&Inst, TRI, MRI, Op, CurrScore); } } @@ -872,7 +872,7 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII, } } for (const MachineOperand &Op : Inst.all_uses()) { - if (Op.isReg() && TRI->isVectorRegister(*MRI, Op.getReg())) + if (TRI->isVectorRegister(*MRI, Op.getReg())) setExpScore(&Inst, TRI, MRI, Op, CurrScore); } } @@ -2327,7 +2327,7 @@ bool SIInsertWaitcnts::shouldFlushVmCnt(MachineLoop *ML, HasVMemStore = true; } for (const MachineOperand &Op : MI.all_uses()) { - if (!Op.isReg() || !TRI->isVectorRegister(*MRI, Op.getReg())) + if (!TRI->isVectorRegister(*MRI, Op.getReg())) continue; RegInterval Interval = Brackets.getRegInterval(&MI, MRI, TRI, Op); // Vgpr use diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index e4a679f6a3ef..30aa36be99c9 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -6231,10 +6231,9 @@ void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB, return; Register DstReg = MRI.createVirtualRegister(DstRC); - auto Copy = BuildMI(InsertMBB, I, DL, get(AMDGPU::COPY), DstReg).add(Op); - + auto Copy = + BuildMI(InsertMBB, I, DL, get(AMDGPU::COPY), DstReg).addReg(OpReg); Op.setReg(DstReg); - Op.setSubReg(0); MachineInstr *Def = MRI.getVRegDef(OpReg); if (!Def) diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 42440bc36f24..fe26d6c2dd09 100644 --- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -1589,7 +1589,7 @@ void ARMExpandPseudo::CMSESaveClearFPRegsV81(MachineBasicBlock &MBB, BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTMSDB_UPD), ARM::SP) .addReg(ARM::SP) .add(predOps(ARMCC::AL)); - for (int Reg = ARM::S16; Reg <= ARM::S31; ++Reg) + for (unsigned Reg = ARM::S16; Reg <= ARM::S31; ++Reg) VPUSH.addReg(Reg); // Clear FP registers with a VSCCLRM. @@ -1794,7 +1794,7 @@ void ARMExpandPseudo::CMSERestoreFPRegsV81( BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDMSIA_UPD), ARM::SP) .addReg(ARM::SP) .add(predOps(ARMCC::AL)); - for (int Reg = ARM::S16; Reg <= ARM::S31; ++Reg) + for (unsigned Reg = ARM::S16; Reg <= ARM::S31; ++Reg) VPOP.addReg(Reg, RegState::Define); } } @@ -2044,13 +2044,14 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB, static void CMSEPushCalleeSaves(const TargetInstrInfo &TII, MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, int JumpReg, - const LivePhysRegs &LiveRegs, bool Thumb1Only) { + MachineBasicBlock::iterator MBBI, + Register JumpReg, const LivePhysRegs &LiveRegs, + bool Thumb1Only) { const DebugLoc &DL = MBBI->getDebugLoc(); if (Thumb1Only) { // push Lo and Hi regs separately MachineInstrBuilder PushMIB = BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL)); - for (int Reg = ARM::R4; Reg < ARM::R8; ++Reg) { + for (unsigned Reg = ARM::R4; Reg < ARM::R8; ++Reg) { PushMIB.addReg( Reg, Reg == JumpReg || LiveRegs.contains(Reg) ? 0 : RegState::Undef); } @@ -2062,7 +2063,8 @@ static void CMSEPushCalleeSaves(const TargetInstrInfo &TII, // memory, and allow us to later pop them with a single instructions. // FIXME: Could also use any of r0-r3 that are free (including in the // first PUSH above). - for (int LoReg = ARM::R7, HiReg = ARM::R11; LoReg >= ARM::R4; --LoReg) { + for (unsigned LoReg = ARM::R7, HiReg = ARM::R11; LoReg >= ARM::R4; + --LoReg) { if (JumpReg == LoReg) continue; BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), LoReg) @@ -2072,7 +2074,7 @@ static void CMSEPushCalleeSaves(const TargetInstrInfo &TII, } MachineInstrBuilder PushMIB2 = BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL)); - for (int Reg = ARM::R4; Reg < ARM::R8; ++Reg) { + for (unsigned Reg = ARM::R4; Reg < ARM::R8; ++Reg) { if (Reg == JumpReg) continue; PushMIB2.addReg(Reg, RegState::Kill); @@ -2082,7 +2084,7 @@ static void CMSEPushCalleeSaves(const TargetInstrInfo &TII, // the JumpReg), use r4 or r5, whichever is not JumpReg. It has already been // saved. if (JumpReg >= ARM::R4 && JumpReg <= ARM::R7) { - int LoReg = JumpReg == ARM::R4 ? ARM::R5 : ARM::R4; + Register LoReg = JumpReg == ARM::R4 ? ARM::R5 : ARM::R4; BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), LoReg) .addReg(ARM::R8, LiveRegs.contains(ARM::R8) ? 0 : RegState::Undef) .add(predOps(ARMCC::AL)); @@ -2095,7 +2097,7 @@ static void CMSEPushCalleeSaves(const TargetInstrInfo &TII, BuildMI(MBB, MBBI, DL, TII.get(ARM::t2STMDB_UPD), ARM::SP) .addReg(ARM::SP) .add(predOps(ARMCC::AL)); - for (int Reg = ARM::R4; Reg < ARM::R12; ++Reg) { + for (unsigned Reg = ARM::R4; Reg < ARM::R12; ++Reg) { PushMIB.addReg( Reg, Reg == JumpReg || LiveRegs.contains(Reg) ? 0 : RegState::Undef); } @@ -2125,7 +2127,7 @@ static void CMSEPopCalleeSaves(const TargetInstrInfo &TII, BuildMI(MBB, MBBI, DL, TII.get(ARM::t2LDMIA_UPD), ARM::SP) .addReg(ARM::SP) .add(predOps(ARMCC::AL)); - for (int Reg = ARM::R4; Reg < ARM::R12; ++Reg) + for (unsigned Reg = ARM::R4; Reg < ARM::R12; ++Reg) PopMIB.addReg(Reg, RegState::Define); } } diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 3e3f134d3470..7d74f86c164f 100644 --- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -712,7 +712,7 @@ public: bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override; ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override; - bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + bool parseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) override; bool ParseDirective(AsmToken DirectiveID) override; @@ -723,7 +723,7 @@ public: checkEarlyTargetMatchPredicate(MCInst &Inst, const OperandVector &Operands) override; - bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) override; @@ -7051,7 +7051,7 @@ void removeVPTCondCode(OperandVector &Operands, unsigned &MnemonicOpsEndInd) { } /// Parse an arm instruction mnemonic followed by its operands. -bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, +bool ARMAsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) { MCAsmParser &Parser = getParser(); @@ -11350,7 +11350,7 @@ static std::string ARMMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID = 0); static const char *getSubtargetFeatureName(uint64_t Val); -bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, +bool ARMAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) { @@ -11427,7 +11427,7 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, llvm_unreachable("Implement any new match types added!"); } -/// parseDirective parses the arm specific directives +/// ParseDirective parses the arm specific directives bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) { const MCContext::Environment Format = getContext().getObjectFileType(); bool IsMachO = Format == MCContext::IsMachO; @@ -12120,7 +12120,7 @@ bool ARMAsmParser::parseDirectiveSetFP(SMLoc L) { return false; } -/// parseDirective +/// parseDirectivePad /// ::= .pad offset bool ARMAsmParser::parseDirectivePad(SMLoc L) { MCAsmParser &Parser = getParser(); diff --git a/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp b/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp index 193722fa3561..b4971e43b48e 100644 --- a/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp +++ b/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp @@ -51,7 +51,7 @@ class AVRAsmParser : public MCTargetAsmParser { #define GET_ASSEMBLER_HEADER #include "AVRGenAsmMatcher.inc" - bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) override; @@ -60,7 +60,7 @@ class AVRAsmParser : public MCTargetAsmParser { ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override; - bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + bool parseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) override; ParseStatus parseDirective(AsmToken DirectiveID) override; @@ -320,7 +320,7 @@ bool AVRAsmParser::emit(MCInst &Inst, SMLoc const &Loc, MCStreamer &Out) const { return false; } -bool AVRAsmParser::MatchAndEmitInstruction(SMLoc Loc, unsigned &Opcode, +bool AVRAsmParser::matchAndEmitInstruction(SMLoc Loc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) { @@ -623,7 +623,7 @@ void AVRAsmParser::eatComma() { } } -bool AVRAsmParser::ParseInstruction(ParseInstructionInfo &Info, +bool AVRAsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Mnemonic, SMLoc NameLoc, OperandVector &Operands) { Operands.push_back(AVROperand::CreateToken(Mnemonic, NameLoc)); diff --git a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp index 9672ed009e9b..06b7743e0cd3 100644 --- a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp +++ b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp @@ -34,7 +34,7 @@ class BPFAsmParser : public MCTargetAsmParser { bool PreMatchCheck(OperandVector &Operands); - bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) override; @@ -43,7 +43,7 @@ class BPFAsmParser : public MCTargetAsmParser { ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override; - bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + bool parseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) override; // "=" is used as assignment operator for assembly statment, so can't be used @@ -304,7 +304,7 @@ bool BPFAsmParser::PreMatchCheck(OperandVector &Operands) { return false; } -bool BPFAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, +bool BPFAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) { @@ -483,9 +483,8 @@ ParseStatus BPFAsmParser::parseImmediate(OperandVector &Operands) { return ParseStatus::Success; } -/// ParseInstruction - Parse an BPF instruction which is in BPF verifier -/// format. -bool BPFAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, +/// Parse an BPF instruction which is in BPF verifier format. +bool BPFAsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) { // The first operand could be either register or actually an operator. unsigned RegNo = MatchRegisterName(Name); diff --git a/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp b/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp index 30bd3dcefa60..d923c96bc008 100644 --- a/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp +++ b/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp @@ -67,14 +67,14 @@ class CSKYAsmParser : public MCTargetAsmParser { SMLoc getLoc() const { return getParser().getTok().getLoc(); } - bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) override; bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override; - bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + bool parseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) override; ParseStatus parseDirective(AsmToken DirectiveID) override; @@ -656,7 +656,7 @@ bool CSKYAsmParser::generateImmOutOfRangeError( return Error(ErrorLoc, Msg + " [" + Twine(Lower) + ", " + Twine(Upper) + "]"); } -bool CSKYAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, +bool CSKYAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, @@ -1485,7 +1485,7 @@ ParseStatus CSKYAsmParser::parseRegList(OperandVector &Operands) { return ParseStatus::Success; } -bool CSKYAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, +bool CSKYAsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) { // First operand is token for instruction. Operands.push_back(CSKYOperand::createToken(Name, NameLoc)); diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td index 902ab37bf741..9aa0af3e3a6b 100644 --- a/llvm/lib/Target/DirectX/DXIL.td +++ b/llvm/lib/Target/DirectX/DXIL.td @@ -553,6 +553,17 @@ def Rbits : DXILOp<30, unary> { let attributes = [Attributes<DXIL1_0, [ReadNone]>]; } +def CBits : DXILOp<31, unary> { + let Doc = "Returns the number of 1 bits in the specified value."; + let LLVMIntrinsic = int_ctpop; + let arguments = [OverloadTy]; + let result = OverloadTy; + let overloads = + [Overloads<DXIL1_0, [Int16Ty, Int32Ty, Int64Ty]>]; + let stages = [Stages<DXIL1_0, [all_stages]>]; + let attributes = [Attributes<DXIL1_0, [ReadNone]>]; +} + def FMax : DXILOp<35, binary> { let Doc = "Float maximum. FMax(a,b) = a > b ? a : b"; let LLVMIntrinsic = int_maxnum; diff --git a/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp index 2fb1c484fc8a..62f188957ccc 100644 --- a/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp +++ b/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp @@ -134,7 +134,7 @@ class HexagonAsmParser : public MCTargetAsmParser { OperandVector &InstOperands, uint64_t &ErrorInfo, bool MatchingInlineAsm); void eatToEndOfPacket(); - bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) override; @@ -180,12 +180,12 @@ public: bool parseExpressionOrOperand(OperandVector &Operands); bool parseExpression(MCExpr const *&Expr); - bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + bool parseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) override { llvm_unreachable("Unimplemented"); } - bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, AsmToken ID, + bool parseInstruction(ParseInstructionInfo &Info, StringRef Name, AsmToken ID, OperandVector &Operands) override; bool ParseDirective(AsmToken DirectiveID) override; @@ -614,7 +614,7 @@ void HexagonAsmParser::eatToEndOfPacket() { InBrackets = false; } -bool HexagonAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, +bool HexagonAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, @@ -1278,7 +1278,7 @@ bool HexagonAsmParser::parseInstruction(OperandVector &Operands) { } } -bool HexagonAsmParser::ParseInstruction(ParseInstructionInfo &Info, +bool HexagonAsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Name, AsmToken ID, OperandVector &Operands) { getLexer().UnLex(ID); diff --git a/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp b/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp index 6ab1375b974e..280f1f3ddbb6 100644 --- a/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp +++ b/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp @@ -62,14 +62,14 @@ class LanaiAsmParser : public MCTargetAsmParser { bool parsePrePost(StringRef Type, int *OffsetValue); - bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + bool parseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) override; bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override; ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override; - bool MatchAndEmitInstruction(SMLoc IdLoc, unsigned &Opcode, + bool matchAndEmitInstruction(SMLoc IdLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) override; @@ -645,7 +645,7 @@ public: } // end anonymous namespace -bool LanaiAsmParser::MatchAndEmitInstruction(SMLoc IdLoc, unsigned &Opcode, +bool LanaiAsmParser::matchAndEmitInstruction(SMLoc IdLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, @@ -1161,7 +1161,7 @@ static bool MaybePredicatedInst(const OperandVector &Operands) { .Default(false); } -bool LanaiAsmParser::ParseInstruction(ParseInstructionInfo & /*Info*/, +bool LanaiAsmParser::parseInstruction(ParseInstructionInfo & /*Info*/, StringRef Name, SMLoc NameLoc, OperandVector &Operands) { // First operand is token for instruction diff --git a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp index b8f1cdfd2cb3..57c42024b4d2 100644 --- a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp +++ b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp @@ -47,10 +47,10 @@ class LoongArchAsmParser : public MCTargetAsmParser { ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override; - bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + bool parseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) override; - bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) override; @@ -65,7 +65,7 @@ class LoongArchAsmParser : public MCTargetAsmParser { const Twine &Msg); /// Helper for processing MC instructions that have been successfully matched - /// by MatchAndEmitInstruction. + /// by matchAndEmitInstruction. bool processInstruction(MCInst &Inst, SMLoc IDLoc, OperandVector &Operands, MCStreamer &Out); @@ -793,7 +793,7 @@ bool LoongArchAsmParser::parseOperand(OperandVector &Operands, return Error(getLoc(), "unknown operand"); } -bool LoongArchAsmParser::ParseInstruction(ParseInstructionInfo &Info, +bool LoongArchAsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) { // First operand in MCInst is instruction mnemonic. @@ -1506,7 +1506,7 @@ bool LoongArchAsmParser::generateImmOutOfRangeError( return Error(ErrorLoc, Msg + " [" + Twine(Lower) + ", " + Twine(Upper) + "]"); } -bool LoongArchAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, +bool LoongArchAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, diff --git a/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp b/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp index 126176133dc0..3a0d9dd316d8 100644 --- a/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp +++ b/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp @@ -69,9 +69,9 @@ public: bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override; ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override; - bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + bool parseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) override; - bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) override; @@ -959,7 +959,7 @@ void M68kAsmParser::eatComma() { } } -bool M68kAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, +bool M68kAsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) { SMLoc Start = getLexer().getLoc(); Operands.push_back(M68kOperand::createToken(Name, Start, Start)); @@ -1024,7 +1024,7 @@ bool M68kAsmParser::emit(MCInst &Inst, SMLoc const &Loc, return false; } -bool M68kAsmParser::MatchAndEmitInstruction(SMLoc Loc, unsigned &Opcode, +bool M68kAsmParser::matchAndEmitInstruction(SMLoc Loc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, diff --git a/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp b/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp index 2bc1a89ef59c..34ae80669f2c 100644 --- a/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp +++ b/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp @@ -40,7 +40,7 @@ class MSP430AsmParser : public MCTargetAsmParser { MCAsmParser &Parser; const MCRegisterInfo *MRI; - bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) override; @@ -49,7 +49,7 @@ class MSP430AsmParser : public MCTargetAsmParser { ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override; - bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + bool parseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) override; ParseStatus parseDirective(AsmToken DirectiveID) override; @@ -252,7 +252,7 @@ public: }; } // end anonymous namespace -bool MSP430AsmParser::MatchAndEmitInstruction(SMLoc Loc, unsigned &Opcode, +bool MSP430AsmParser::matchAndEmitInstruction(SMLoc Loc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, @@ -385,7 +385,7 @@ bool MSP430AsmParser::parseJccInstruction(ParseInstructionInfo &Info, return false; } -bool MSP430AsmParser::ParseInstruction(ParseInstructionInfo &Info, +bool MSP430AsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) { // Drop .w suffix diff --git a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 8ab435c6c6fd..7888c57363ed 100644 --- a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -174,7 +174,7 @@ class MipsAsmParser : public MCTargetAsmParser { const OperandVector &Operands) override; unsigned checkTargetMatchPredicate(MCInst &Inst) override; - bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) override; @@ -190,7 +190,7 @@ class MipsAsmParser : public MCTargetAsmParser { bool mnemonicIsValid(StringRef Mnemonic, unsigned VariantID); - bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + bool parseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) override; bool ParseDirective(AsmToken DirectiveID) override; @@ -5992,7 +5992,7 @@ static SMLoc RefineErrorLoc(const SMLoc Loc, const OperandVector &Operands, return Loc; } -bool MipsAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, +bool MipsAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, @@ -6997,10 +6997,10 @@ bool MipsAsmParser::areEqualRegs(const MCParsedAsmOperand &Op1, return Op1.getReg() == Op2.getReg(); } -bool MipsAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, +bool MipsAsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) { MCAsmParser &Parser = getParser(); - LLVM_DEBUG(dbgs() << "ParseInstruction\n"); + LLVM_DEBUG(dbgs() << "parseInstruction\n"); // We have reached first instruction, module directive are now forbidden. getTargetStreamer().forbidModuleDirective(); diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index c5a40e430886..31a5e937adae 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -6179,6 +6179,7 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, case MVT::v4i16: case MVT::v4i32: case MVT::v4f16: + case MVT::v4bf16: case MVT::v4f32: case MVT::v8f16: // <4 x f16x2> case MVT::v8bf16: // <4 x bf16x2> diff --git a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index 59ad995b44b0..597a976b076a 100644 --- a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -103,32 +103,32 @@ class PPCAsmParser : public MCTargetAsmParser { bool isPPC64() const { return IsPPC64; } - bool MatchRegisterName(MCRegister &RegNo, int64_t &IntVal); + bool matchRegisterName(MCRegister &RegNo, int64_t &IntVal); bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override; ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override; - const MCExpr *ExtractModifierFromExpr(const MCExpr *E, + const MCExpr *extractModifierFromExpr(const MCExpr *E, PPCMCExpr::VariantKind &Variant); - const MCExpr *FixupVariantKind(const MCExpr *E); - bool ParseExpression(const MCExpr *&EVal); + const MCExpr *fixupVariantKind(const MCExpr *E); + bool parseExpression(const MCExpr *&EVal); - bool ParseOperand(OperandVector &Operands); + bool parseOperand(OperandVector &Operands); - bool ParseDirectiveWord(unsigned Size, AsmToken ID); - bool ParseDirectiveTC(unsigned Size, AsmToken ID); - bool ParseDirectiveMachine(SMLoc L); - bool ParseDirectiveAbiVersion(SMLoc L); - bool ParseDirectiveLocalEntry(SMLoc L); - bool ParseGNUAttribute(SMLoc L); + bool parseDirectiveWord(unsigned Size, AsmToken ID); + bool parseDirectiveTC(unsigned Size, AsmToken ID); + bool parseDirectiveMachine(SMLoc L); + bool parseDirectiveAbiVersion(SMLoc L); + bool parseDirectiveLocalEntry(SMLoc L); + bool parseGNUAttribute(SMLoc L); - bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) override; - void ProcessInstruction(MCInst &Inst, const OperandVector &Ops); + void processInstruction(MCInst &Inst, const OperandVector &Ops); /// @name Auto-generated Match Functions /// { @@ -150,7 +150,7 @@ public: setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); } - bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + bool parseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) override; bool ParseDirective(AsmToken DirectiveID) override; @@ -818,7 +818,7 @@ addNegOperand(MCInst &Inst, MCOperand &Op, MCContext &Ctx) { Inst.addOperand(MCOperand::createExpr(MCUnaryExpr::createMinus(Expr, Ctx))); } -void PPCAsmParser::ProcessInstruction(MCInst &Inst, +void PPCAsmParser::processInstruction(MCInst &Inst, const OperandVector &Operands) { int Opcode = Inst.getOpcode(); switch (Opcode) { @@ -1252,7 +1252,7 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst, static std::string PPCMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID = 0); -bool PPCAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, +bool PPCAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) { @@ -1261,7 +1261,7 @@ bool PPCAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) { case Match_Success: // Post-process instructions (typically extended mnemonics) - ProcessInstruction(Inst, Operands); + processInstruction(Inst, Operands); Inst.setLoc(IDLoc); Out.emitInstruction(Inst, getSTI()); return false; @@ -1291,7 +1291,7 @@ bool PPCAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, llvm_unreachable("Implement any new match types added!"); } -bool PPCAsmParser::MatchRegisterName(MCRegister &RegNo, int64_t &IntVal) { +bool PPCAsmParser::matchRegisterName(MCRegister &RegNo, int64_t &IntVal) { if (getParser().getTok().is(AsmToken::Percent)) getParser().Lex(); // Eat the '%'. @@ -1364,7 +1364,7 @@ ParseStatus PPCAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, EndLoc = Tok.getEndLoc(); Reg = PPC::NoRegister; int64_t IntVal; - if (MatchRegisterName(Reg, IntVal)) + if (matchRegisterName(Reg, IntVal)) return ParseStatus::NoMatch; return ParseStatus::Success; } @@ -1375,9 +1375,9 @@ ParseStatus PPCAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, /// variant, return the corresponding PPCMCExpr::VariantKind, /// and a modified expression using the default symbol variant. /// Otherwise, return NULL. -const MCExpr *PPCAsmParser:: -ExtractModifierFromExpr(const MCExpr *E, - PPCMCExpr::VariantKind &Variant) { +const MCExpr * +PPCAsmParser::extractModifierFromExpr(const MCExpr *E, + PPCMCExpr::VariantKind &Variant) { MCContext &Context = getParser().getContext(); Variant = PPCMCExpr::VK_PPC_None; @@ -1426,7 +1426,7 @@ ExtractModifierFromExpr(const MCExpr *E, case MCExpr::Unary: { const MCUnaryExpr *UE = cast<MCUnaryExpr>(E); - const MCExpr *Sub = ExtractModifierFromExpr(UE->getSubExpr(), Variant); + const MCExpr *Sub = extractModifierFromExpr(UE->getSubExpr(), Variant); if (!Sub) return nullptr; return MCUnaryExpr::create(UE->getOpcode(), Sub, Context); @@ -1435,8 +1435,8 @@ ExtractModifierFromExpr(const MCExpr *E, case MCExpr::Binary: { const MCBinaryExpr *BE = cast<MCBinaryExpr>(E); PPCMCExpr::VariantKind LHSVariant, RHSVariant; - const MCExpr *LHS = ExtractModifierFromExpr(BE->getLHS(), LHSVariant); - const MCExpr *RHS = ExtractModifierFromExpr(BE->getRHS(), RHSVariant); + const MCExpr *LHS = extractModifierFromExpr(BE->getLHS(), LHSVariant); + const MCExpr *RHS = extractModifierFromExpr(BE->getRHS(), RHSVariant); if (!LHS && !RHS) return nullptr; @@ -1464,8 +1464,7 @@ ExtractModifierFromExpr(const MCExpr *E, /// them by VK_PPC_TLSGD/VK_PPC_TLSLD. This is necessary to avoid having /// _GLOBAL_OFFSET_TABLE_ created via ELFObjectWriter::RelocNeedsGOT. /// FIXME: This is a hack. -const MCExpr *PPCAsmParser:: -FixupVariantKind(const MCExpr *E) { +const MCExpr *PPCAsmParser::fixupVariantKind(const MCExpr *E) { MCContext &Context = getParser().getContext(); switch (E->getKind()) { @@ -1492,7 +1491,7 @@ FixupVariantKind(const MCExpr *E) { case MCExpr::Unary: { const MCUnaryExpr *UE = cast<MCUnaryExpr>(E); - const MCExpr *Sub = FixupVariantKind(UE->getSubExpr()); + const MCExpr *Sub = fixupVariantKind(UE->getSubExpr()); if (Sub == UE->getSubExpr()) return E; return MCUnaryExpr::create(UE->getOpcode(), Sub, Context); @@ -1500,8 +1499,8 @@ FixupVariantKind(const MCExpr *E) { case MCExpr::Binary: { const MCBinaryExpr *BE = cast<MCBinaryExpr>(E); - const MCExpr *LHS = FixupVariantKind(BE->getLHS()); - const MCExpr *RHS = FixupVariantKind(BE->getRHS()); + const MCExpr *LHS = fixupVariantKind(BE->getLHS()); + const MCExpr *RHS = fixupVariantKind(BE->getRHS()); if (LHS == BE->getLHS() && RHS == BE->getRHS()) return E; return MCBinaryExpr::create(BE->getOpcode(), LHS, RHS, Context); @@ -1511,29 +1510,27 @@ FixupVariantKind(const MCExpr *E) { llvm_unreachable("Invalid expression kind!"); } -/// ParseExpression. This differs from the default "parseExpression" in that -/// it handles modifiers. -bool PPCAsmParser:: -ParseExpression(const MCExpr *&EVal) { +/// This differs from the default "parseExpression" in that it handles +/// modifiers. +bool PPCAsmParser::parseExpression(const MCExpr *&EVal) { // (ELF Platforms) // Handle \code @l/@ha \endcode if (getParser().parseExpression(EVal)) return true; - EVal = FixupVariantKind(EVal); + EVal = fixupVariantKind(EVal); PPCMCExpr::VariantKind Variant; - const MCExpr *E = ExtractModifierFromExpr(EVal, Variant); + const MCExpr *E = extractModifierFromExpr(EVal, Variant); if (E) EVal = PPCMCExpr::create(Variant, E, getParser().getContext()); return false; } -/// ParseOperand /// This handles registers in the form 'NN', '%rNN' for ELF platforms and /// rNN for MachO. -bool PPCAsmParser::ParseOperand(OperandVector &Operands) { +bool PPCAsmParser::parseOperand(OperandVector &Operands) { MCAsmParser &Parser = getParser(); SMLoc S = Parser.getTok().getLoc(); SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); @@ -1546,7 +1543,7 @@ bool PPCAsmParser::ParseOperand(OperandVector &Operands) { case AsmToken::Percent: { MCRegister RegNo; int64_t IntVal; - if (MatchRegisterName(RegNo, IntVal)) + if (matchRegisterName(RegNo, IntVal)) return Error(S, "invalid register name"); Operands.push_back(PPCOperand::CreateImm(IntVal, S, E, isPPC64())); @@ -1561,7 +1558,7 @@ bool PPCAsmParser::ParseOperand(OperandVector &Operands) { case AsmToken::Dollar: case AsmToken::Exclaim: case AsmToken::Tilde: - if (!ParseExpression(EVal)) + if (!parseExpression(EVal)) break; // Fall-through [[fallthrough]]; @@ -1589,7 +1586,7 @@ bool PPCAsmParser::ParseOperand(OperandVector &Operands) { if (TlsCall && parseOptionalToken(AsmToken::LParen)) { const MCExpr *TLSSym; const SMLoc S2 = Parser.getTok().getLoc(); - if (ParseExpression(TLSSym)) + if (parseExpression(TLSSym)) return Error(S2, "invalid TLS call expression"); E = Parser.getTok().getLoc(); if (parseToken(AsmToken::RParen, "expected ')'")) @@ -1631,7 +1628,7 @@ bool PPCAsmParser::ParseOperand(OperandVector &Operands) { switch (getLexer().getKind()) { case AsmToken::Percent: { MCRegister RegNo; - if (MatchRegisterName(RegNo, IntVal)) + if (matchRegisterName(RegNo, IntVal)) return Error(S, "invalid register name"); break; } @@ -1655,7 +1652,7 @@ bool PPCAsmParser::ParseOperand(OperandVector &Operands) { } /// Parse an instruction mnemonic followed by its operands. -bool PPCAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, +bool PPCAsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) { // The first operand is the token for the instruction name. // If the next character is a '+' or '-', we need to add it to the @@ -1695,11 +1692,11 @@ bool PPCAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, return false; // Parse the first operand - if (ParseOperand(Operands)) + if (parseOperand(Operands)) return true; while (!parseOptionalToken(AsmToken::EndOfStatement)) { - if (parseToken(AsmToken::Comma) || ParseOperand(Operands)) + if (parseToken(AsmToken::Comma) || parseOperand(Operands)) return true; } @@ -1731,31 +1728,30 @@ bool PPCAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, return false; } -/// ParseDirective parses the PPC specific directives +/// Parses the PPC specific directives bool PPCAsmParser::ParseDirective(AsmToken DirectiveID) { StringRef IDVal = DirectiveID.getIdentifier(); if (IDVal == ".word") - ParseDirectiveWord(2, DirectiveID); + parseDirectiveWord(2, DirectiveID); else if (IDVal == ".llong") - ParseDirectiveWord(8, DirectiveID); + parseDirectiveWord(8, DirectiveID); else if (IDVal == ".tc") - ParseDirectiveTC(isPPC64() ? 8 : 4, DirectiveID); + parseDirectiveTC(isPPC64() ? 8 : 4, DirectiveID); else if (IDVal == ".machine") - ParseDirectiveMachine(DirectiveID.getLoc()); + parseDirectiveMachine(DirectiveID.getLoc()); else if (IDVal == ".abiversion") - ParseDirectiveAbiVersion(DirectiveID.getLoc()); + parseDirectiveAbiVersion(DirectiveID.getLoc()); else if (IDVal == ".localentry") - ParseDirectiveLocalEntry(DirectiveID.getLoc()); + parseDirectiveLocalEntry(DirectiveID.getLoc()); else if (IDVal.starts_with(".gnu_attribute")) - ParseGNUAttribute(DirectiveID.getLoc()); + parseGNUAttribute(DirectiveID.getLoc()); else return true; return false; } -/// ParseDirectiveWord /// ::= .word [ expression (, expression)* ] -bool PPCAsmParser::ParseDirectiveWord(unsigned Size, AsmToken ID) { +bool PPCAsmParser::parseDirectiveWord(unsigned Size, AsmToken ID) { auto parseOp = [&]() -> bool { const MCExpr *Value; SMLoc ExprLoc = getParser().getTok().getLoc(); @@ -1778,9 +1774,8 @@ bool PPCAsmParser::ParseDirectiveWord(unsigned Size, AsmToken ID) { return false; } -/// ParseDirectiveTC /// ::= .tc [ symbol (, expression)* ] -bool PPCAsmParser::ParseDirectiveTC(unsigned Size, AsmToken ID) { +bool PPCAsmParser::parseDirectiveTC(unsigned Size, AsmToken ID) { MCAsmParser &Parser = getParser(); // Skip TC symbol, which is only used with XCOFF. while (getLexer().isNot(AsmToken::EndOfStatement) @@ -1793,12 +1788,12 @@ bool PPCAsmParser::ParseDirectiveTC(unsigned Size, AsmToken ID) { getParser().getStreamer().emitValueToAlignment(Align(Size)); // Emit expressions. - return ParseDirectiveWord(Size, ID); + return parseDirectiveWord(Size, ID); } -/// ParseDirectiveMachine (ELF platforms) +/// ELF platforms. /// ::= .machine [ cpu | "push" | "pop" ] -bool PPCAsmParser::ParseDirectiveMachine(SMLoc L) { +bool PPCAsmParser::parseDirectiveMachine(SMLoc L) { MCAsmParser &Parser = getParser(); if (Parser.getTok().isNot(AsmToken::Identifier) && Parser.getTok().isNot(AsmToken::String)) @@ -1823,9 +1818,8 @@ bool PPCAsmParser::ParseDirectiveMachine(SMLoc L) { return false; } -/// ParseDirectiveAbiVersion /// ::= .abiversion constant-expression -bool PPCAsmParser::ParseDirectiveAbiVersion(SMLoc L) { +bool PPCAsmParser::parseDirectiveAbiVersion(SMLoc L) { int64_t AbiVersion; if (check(getParser().parseAbsoluteExpression(AbiVersion), L, "expected constant expression") || @@ -1840,9 +1834,8 @@ bool PPCAsmParser::ParseDirectiveAbiVersion(SMLoc L) { return false; } -/// ParseDirectiveLocalEntry /// ::= .localentry symbol, expression -bool PPCAsmParser::ParseDirectiveLocalEntry(SMLoc L) { +bool PPCAsmParser::parseDirectiveLocalEntry(SMLoc L) { StringRef Name; if (getParser().parseIdentifier(Name)) return Error(L, "expected identifier in '.localentry' directive"); @@ -1863,7 +1856,7 @@ bool PPCAsmParser::ParseDirectiveLocalEntry(SMLoc L) { return false; } -bool PPCAsmParser::ParseGNUAttribute(SMLoc L) { +bool PPCAsmParser::parseGNUAttribute(SMLoc L) { int64_t Tag; int64_t IntegerValue; if (!getParser().parseGNUAttribute(L, Tag, IntegerValue)) diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index 6eb205810761..9600293d3da7 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -104,7 +104,7 @@ class RISCVAsmParser : public MCTargetAsmParser { bool generateImmOutOfRangeError(SMLoc ErrorLoc, int64_t Lower, int64_t Upper, const Twine &Msg); - bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) override; @@ -114,7 +114,7 @@ class RISCVAsmParser : public MCTargetAsmParser { ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override; - bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + bool parseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) override; ParseStatus parseDirective(AsmToken DirectiveID) override; @@ -182,7 +182,7 @@ class RISCVAsmParser : public MCTargetAsmParser { bool validateInstruction(MCInst &Inst, OperandVector &Operands); /// Helper for processing MC instructions that have been successfully matched - /// by MatchAndEmitInstruction. Modifications to the emitted instructions, + /// by matchAndEmitInstruction. Modifications to the emitted instructions, /// like the expansion of pseudo instructions (e.g., "li"), can be performed /// in this method. bool processInstruction(MCInst &Inst, SMLoc IDLoc, OperandVector &Operands, @@ -1376,7 +1376,7 @@ bool RISCVAsmParser::generateImmOutOfRangeError( return generateImmOutOfRangeError(ErrorLoc, Lower, Upper, Msg); } -bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, +bool RISCVAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, @@ -2732,7 +2732,7 @@ bool RISCVAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { return true; } -bool RISCVAsmParser::ParseInstruction(ParseInstructionInfo &Info, +bool RISCVAsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) { // Ensure that if the instruction occurs when relaxation is enabled, @@ -3186,12 +3186,12 @@ bool RISCVAsmParser::parseDirectiveInsn(SMLoc L) { ParseInstructionInfo Info; SmallVector<std::unique_ptr<MCParsedAsmOperand>, 8> Operands; - if (ParseInstruction(Info, FormatName, L, Operands)) + if (parseInstruction(Info, FormatName, L, Operands)) return true; unsigned Opcode; uint64_t ErrorInfo; - return MatchAndEmitInstruction(L, Opcode, Operands, Parser.getStreamer(), + return matchAndEmitInstruction(L, Opcode, Operands, Parser.getStreamer(), ErrorInfo, /*MatchingInlineAsm=*/false); } diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp index c204683f4e79..055193bcc2c8 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp @@ -287,34 +287,48 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) auto &LoadActions = getActionDefinitionsBuilder(G_LOAD); auto &StoreActions = getActionDefinitionsBuilder(G_STORE); + auto &ExtLoadActions = getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD}); - LoadActions - .legalForTypesWithMemDesc({{s32, p0, s8, 8}, - {s32, p0, s16, 16}, - {s32, p0, s32, 32}, - {p0, p0, sXLen, XLen}}); - StoreActions - .legalForTypesWithMemDesc({{s32, p0, s8, 8}, - {s32, p0, s16, 16}, - {s32, p0, s32, 32}, - {p0, p0, sXLen, XLen}}); - auto &ExtLoadActions = - getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD}) - .legalForTypesWithMemDesc({{s32, p0, s8, 8}, {s32, p0, s16, 16}}); + // Return the alignment needed for scalar memory ops. If unaligned scalar mem + // is supported, we only require byte alignment. Otherwise, we need the memory + // op to be natively aligned. + auto getScalarMemAlign = [&ST](unsigned Size) { + return ST.enableUnalignedScalarMem() ? 8 : Size; + }; + + LoadActions.legalForTypesWithMemDesc( + {{s32, p0, s8, getScalarMemAlign(8)}, + {s32, p0, s16, getScalarMemAlign(16)}, + {s32, p0, s32, getScalarMemAlign(32)}, + {p0, p0, sXLen, getScalarMemAlign(XLen)}}); + StoreActions.legalForTypesWithMemDesc( + {{s32, p0, s8, getScalarMemAlign(8)}, + {s32, p0, s16, getScalarMemAlign(16)}, + {s32, p0, s32, getScalarMemAlign(32)}, + {p0, p0, sXLen, getScalarMemAlign(XLen)}}); + ExtLoadActions.legalForTypesWithMemDesc( + {{s32, p0, s8, getScalarMemAlign(8)}, + {s32, p0, s16, getScalarMemAlign(16)}}); if (XLen == 64) { - LoadActions.legalForTypesWithMemDesc({{s64, p0, s8, 8}, - {s64, p0, s16, 16}, - {s64, p0, s32, 32}, - {s64, p0, s64, 64}}); - StoreActions.legalForTypesWithMemDesc({{s64, p0, s8, 8}, - {s64, p0, s16, 16}, - {s64, p0, s32, 32}, - {s64, p0, s64, 64}}); + LoadActions.legalForTypesWithMemDesc( + {{s64, p0, s8, getScalarMemAlign(8)}, + {s64, p0, s16, getScalarMemAlign(16)}, + {s64, p0, s32, getScalarMemAlign(32)}, + {s64, p0, s64, getScalarMemAlign(64)}}); + StoreActions.legalForTypesWithMemDesc( + {{s64, p0, s8, getScalarMemAlign(8)}, + {s64, p0, s16, getScalarMemAlign(16)}, + {s64, p0, s32, getScalarMemAlign(32)}, + {s64, p0, s64, getScalarMemAlign(64)}}); ExtLoadActions.legalForTypesWithMemDesc( - {{s64, p0, s8, 8}, {s64, p0, s16, 16}, {s64, p0, s32, 32}}); + {{s64, p0, s8, getScalarMemAlign(8)}, + {s64, p0, s16, getScalarMemAlign(16)}, + {s64, p0, s32, getScalarMemAlign(32)}}); } else if (ST.hasStdExtD()) { - LoadActions.legalForTypesWithMemDesc({{s64, p0, s64, 64}}); - StoreActions.legalForTypesWithMemDesc({{s64, p0, s64, 64}}); + LoadActions.legalForTypesWithMemDesc( + {{s64, p0, s64, getScalarMemAlign(64)}}); + StoreActions.legalForTypesWithMemDesc( + {{s64, p0, s64, getScalarMemAlign(64)}}); } // Vector loads/stores. @@ -541,7 +555,9 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) // FIXME: We can do custom inline expansion like SelectionDAG. // FIXME: Legal with Zfa. - getActionDefinitionsBuilder({G_FCEIL, G_FFLOOR}) + getActionDefinitionsBuilder({G_FCEIL, G_FFLOOR, G_FRINT, G_FNEARBYINT, + G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND, + G_INTRINSIC_ROUNDEVEN}) .libcallFor({s32, s64}); getActionDefinitionsBuilder(G_VASTART).customFor({p0}); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index d5b3cccda02d..189fb741f34c 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -941,39 +941,41 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, }; // TODO: support more ops. - static const unsigned ZvfhminPromoteOps[] = { - ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB, - ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT, - ISD::FCEIL, ISD::FFLOOR, ISD::FROUND, ISD::FROUNDEVEN, - ISD::FRINT, ISD::FNEARBYINT, ISD::IS_FPCLASS, ISD::SETCC, - ISD::FMAXIMUM, ISD::FMINIMUM, ISD::STRICT_FADD, ISD::STRICT_FSUB, - ISD::STRICT_FMUL, ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA}; + static const unsigned ZvfhminZvfbfminPromoteOps[] = { + ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB, + ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT, + ISD::FCEIL, ISD::FTRUNC, ISD::FFLOOR, ISD::FROUND, + ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT, ISD::IS_FPCLASS, + ISD::SETCC, ISD::FMAXIMUM, ISD::FMINIMUM, ISD::STRICT_FADD, + ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV, ISD::STRICT_FSQRT, + ISD::STRICT_FMA}; // TODO: support more vp ops. - static const unsigned ZvfhminPromoteVPOps[] = {ISD::VP_FADD, - ISD::VP_FSUB, - ISD::VP_FMUL, - ISD::VP_FDIV, - ISD::VP_FMA, - ISD::VP_REDUCE_FADD, - ISD::VP_REDUCE_SEQ_FADD, - ISD::VP_REDUCE_FMIN, - ISD::VP_REDUCE_FMAX, - ISD::VP_SQRT, - ISD::VP_FMINNUM, - ISD::VP_FMAXNUM, - ISD::VP_FCEIL, - ISD::VP_FFLOOR, - ISD::VP_FROUND, - ISD::VP_FROUNDEVEN, - ISD::VP_FROUNDTOZERO, - ISD::VP_FRINT, - ISD::VP_FNEARBYINT, - ISD::VP_SETCC, - ISD::VP_FMINIMUM, - ISD::VP_FMAXIMUM, - ISD::VP_REDUCE_FMINIMUM, - ISD::VP_REDUCE_FMAXIMUM}; + static const unsigned ZvfhminZvfbfminPromoteVPOps[] = { + ISD::VP_FADD, + ISD::VP_FSUB, + ISD::VP_FMUL, + ISD::VP_FDIV, + ISD::VP_FMA, + ISD::VP_REDUCE_FADD, + ISD::VP_REDUCE_SEQ_FADD, + ISD::VP_REDUCE_FMIN, + ISD::VP_REDUCE_FMAX, + ISD::VP_SQRT, + ISD::VP_FMINNUM, + ISD::VP_FMAXNUM, + ISD::VP_FCEIL, + ISD::VP_FFLOOR, + ISD::VP_FROUND, + ISD::VP_FROUNDEVEN, + ISD::VP_FROUNDTOZERO, + ISD::VP_FRINT, + ISD::VP_FNEARBYINT, + ISD::VP_SETCC, + ISD::VP_FMINIMUM, + ISD::VP_FMAXIMUM, + ISD::VP_REDUCE_FMINIMUM, + ISD::VP_REDUCE_FMAXIMUM}; // Sets common operation actions on RVV floating-point vector types. const auto SetCommonVFPActions = [&](MVT VT) { @@ -1061,6 +1063,45 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, } }; + // Sets common actions for f16 and bf16 for when there's only + // zvfhmin/zvfbfmin and we need to promote to f32 for most operations. + const auto SetCommonPromoteToF32Actions = [&](MVT VT) { + setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom); + setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT, + Custom); + setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom); + setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT, + Custom); + setOperationAction(ISD::SELECT_CC, VT, Expand); + setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::VP_SINT_TO_FP, + ISD::VP_UINT_TO_FP}, + VT, Custom); + setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, + ISD::EXTRACT_SUBVECTOR, ISD::VECTOR_INTERLEAVE, + ISD::VECTOR_DEINTERLEAVE}, + VT, Custom); + MVT EltVT = VT.getVectorElementType(); + if (isTypeLegal(EltVT)) + setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); + else + setOperationAction(ISD::SPLAT_VECTOR, EltVT, Custom); + setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); + + setOperationAction(ISD::FNEG, VT, Expand); + setOperationAction(ISD::FABS, VT, Expand); + setOperationAction(ISD::FCOPYSIGN, VT, Expand); + + // Custom split nxv32[b]f16 since nxv32[b]f32 is not legal. + if (getLMUL(VT) == RISCVII::VLMUL::LMUL_8) { + setOperationAction(ZvfhminZvfbfminPromoteOps, VT, Custom); + setOperationAction(ZvfhminZvfbfminPromoteVPOps, VT, Custom); + } else { + MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); + setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT); + setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT); + } + }; + if (Subtarget.hasVInstructionsF16()) { for (MVT VT : F16VecVTs) { if (!isTypeLegal(VT)) @@ -1071,74 +1112,15 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, for (MVT VT : F16VecVTs) { if (!isTypeLegal(VT)) continue; - setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom); - setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT, - Custom); - setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom); - setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT, - Custom); - setOperationAction(ISD::SELECT_CC, VT, Expand); - setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, - ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, - VT, Custom); - setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, - ISD::EXTRACT_SUBVECTOR, ISD::VECTOR_INTERLEAVE, - ISD::VECTOR_DEINTERLEAVE}, - VT, Custom); - if (Subtarget.hasStdExtZfhmin()) - setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); - else - setOperationAction(ISD::SPLAT_VECTOR, MVT::f16, Custom); - // load/store - setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); - - setOperationAction(ISD::FNEG, VT, Expand); - setOperationAction(ISD::FABS, VT, Expand); - setOperationAction(ISD::FCOPYSIGN, VT, Expand); - - // Custom split nxv32f16 since nxv32f32 if not legal. - if (VT == MVT::nxv32f16) { - setOperationAction(ZvfhminPromoteOps, VT, Custom); - setOperationAction(ZvfhminPromoteVPOps, VT, Custom); - continue; - } - // Add more promote ops. - MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); - setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT); - setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT); + SetCommonPromoteToF32Actions(VT); } } - // TODO: Could we merge some code with zvfhmin? if (Subtarget.hasVInstructionsBF16Minimal()) { for (MVT VT : BF16VecVTs) { if (!isTypeLegal(VT)) continue; - setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom); - setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT, - Custom); - setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom); - setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT, - Custom); - setOperationAction(ISD::SELECT_CC, VT, Expand); - setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, - ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, - VT, Custom); - setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, - ISD::EXTRACT_SUBVECTOR, ISD::VECTOR_INTERLEAVE, - ISD::VECTOR_DEINTERLEAVE}, - VT, Custom); - if (Subtarget.hasStdExtZfbfmin()) - setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); - else - setOperationAction(ISD::SPLAT_VECTOR, MVT::bf16, Custom); - setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); - - setOperationAction(ISD::FNEG, VT, Expand); - setOperationAction(ISD::FABS, VT, Expand); - setOperationAction(ISD::FCOPYSIGN, VT, Expand); - - // TODO: Promote to fp32. + SetCommonPromoteToF32Actions(VT); } } @@ -1374,8 +1356,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, // TODO: could split the f16 vector into two vectors and do promotion. if (!isTypeLegal(F32VecVT)) continue; - setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT); - setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT); + setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT); + setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT); continue; } @@ -6332,6 +6314,17 @@ static bool hasMaskOp(unsigned Opcode) { return false; } +static bool isPromotedOpNeedingSplit(SDValue Op, + const RISCVSubtarget &Subtarget) { + if (Op.getValueType() == MVT::nxv32f16 && + (Subtarget.hasVInstructionsF16Minimal() && + !Subtarget.hasVInstructionsF16())) + return true; + if (Op.getValueType() == MVT::nxv32bf16) + return true; + return false; +} + static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG) { auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType()); SDLoc DL(Op); @@ -6669,9 +6662,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, } case ISD::FMAXIMUM: case ISD::FMINIMUM: - if (Op.getValueType() == MVT::nxv32f16 && - (Subtarget.hasVInstructionsF16Minimal() && - !Subtarget.hasVInstructionsF16())) + if (isPromotedOpNeedingSplit(Op, Subtarget)) return SplitVectorOp(Op, DAG); return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget); case ISD::FP_EXTEND: @@ -6687,8 +6678,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, (Subtarget.hasVInstructionsF16Minimal() && !Subtarget.hasVInstructionsF16())) || Op.getValueType().getScalarType() == MVT::bf16)) { - if (Op.getValueType() == MVT::nxv32f16 || - Op.getValueType() == MVT::nxv32bf16) + if (isPromotedOpNeedingSplit(Op, Subtarget)) return SplitVectorOp(Op, DAG); // int -> f32 SDLoc DL(Op); @@ -6708,8 +6698,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, (Subtarget.hasVInstructionsF16Minimal() && !Subtarget.hasVInstructionsF16())) || Op1.getValueType().getScalarType() == MVT::bf16)) { - if (Op1.getValueType() == MVT::nxv32f16 || - Op1.getValueType() == MVT::nxv32bf16) + if (isPromotedOpNeedingSplit(Op1, Subtarget)) return SplitVectorOp(Op, DAG); // [b]f16 -> f32 SDLoc DL(Op); @@ -6941,6 +6930,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, case ISD::FRINT: case ISD::FROUND: case ISD::FROUNDEVEN: + if (isPromotedOpNeedingSplit(Op, Subtarget)) + return SplitVectorOp(Op, DAG); return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); case ISD::LRINT: case ISD::LLRINT: @@ -6997,9 +6988,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, case ISD::VP_REDUCE_FMAX: case ISD::VP_REDUCE_FMINIMUM: case ISD::VP_REDUCE_FMAXIMUM: - if (Op.getOperand(1).getValueType() == MVT::nxv32f16 && - (Subtarget.hasVInstructionsF16Minimal() && - !Subtarget.hasVInstructionsF16())) + if (isPromotedOpNeedingSplit(Op.getOperand(1), Subtarget)) return SplitVectorReductionOp(Op, DAG); return lowerVPREDUCE(Op, DAG); case ISD::VP_REDUCE_AND: @@ -7246,9 +7235,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, return DAG.getSetCC(DL, VT, RHS, LHS, CCVal); } - if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 && - (Subtarget.hasVInstructionsF16Minimal() && - !Subtarget.hasVInstructionsF16())) + if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget)) return SplitVectorOp(Op, DAG); return lowerFixedLengthVectorSetccToRVV(Op, DAG); @@ -7290,9 +7277,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, case ISD::FMA: case ISD::FMINNUM: case ISD::FMAXNUM: - if (Op.getValueType() == MVT::nxv32f16 && - (Subtarget.hasVInstructionsF16Minimal() && - !Subtarget.hasVInstructionsF16())) + if (isPromotedOpNeedingSplit(Op, Subtarget)) return SplitVectorOp(Op, DAG); [[fallthrough]]; case ISD::AVGFLOORS: @@ -7340,9 +7325,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, case ISD::FCOPYSIGN: if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16) return lowerFCOPYSIGN(Op, DAG, Subtarget); - if (Op.getValueType() == MVT::nxv32f16 && - (Subtarget.hasVInstructionsF16Minimal() && - !Subtarget.hasVInstructionsF16())) + if (isPromotedOpNeedingSplit(Op, Subtarget)) return SplitVectorOp(Op, DAG); return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG); case ISD::STRICT_FADD: @@ -7351,9 +7334,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, case ISD::STRICT_FDIV: case ISD::STRICT_FSQRT: case ISD::STRICT_FMA: - if (Op.getValueType() == MVT::nxv32f16 && - (Subtarget.hasVInstructionsF16Minimal() && - !Subtarget.hasVInstructionsF16())) + if (isPromotedOpNeedingSplit(Op, Subtarget)) return SplitStrictFPVectorOp(Op, DAG); return lowerToScalableOp(Op, DAG); case ISD::STRICT_FSETCC: @@ -7410,9 +7391,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, case ISD::VP_FMINNUM: case ISD::VP_FMAXNUM: case ISD::VP_FCOPYSIGN: - if (Op.getValueType() == MVT::nxv32f16 && - (Subtarget.hasVInstructionsF16Minimal() && - !Subtarget.hasVInstructionsF16())) + if (isPromotedOpNeedingSplit(Op, Subtarget)) return SplitVPOp(Op, DAG); [[fallthrough]]; case ISD::VP_SRA: @@ -7438,8 +7417,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, (Subtarget.hasVInstructionsF16Minimal() && !Subtarget.hasVInstructionsF16())) || Op.getValueType().getScalarType() == MVT::bf16)) { - if (Op.getValueType() == MVT::nxv32f16 || - Op.getValueType() == MVT::nxv32bf16) + if (isPromotedOpNeedingSplit(Op, Subtarget)) return SplitVectorOp(Op, DAG); // int -> f32 SDLoc DL(Op); @@ -7459,8 +7437,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, (Subtarget.hasVInstructionsF16Minimal() && !Subtarget.hasVInstructionsF16())) || Op1.getValueType().getScalarType() == MVT::bf16)) { - if (Op1.getValueType() == MVT::nxv32f16 || - Op1.getValueType() == MVT::nxv32bf16) + if (isPromotedOpNeedingSplit(Op1, Subtarget)) return SplitVectorOp(Op, DAG); // [b]f16 -> f32 SDLoc DL(Op); @@ -7473,9 +7450,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, } return lowerVPFPIntConvOp(Op, DAG); case ISD::VP_SETCC: - if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 && - (Subtarget.hasVInstructionsF16Minimal() && - !Subtarget.hasVInstructionsF16())) + if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget)) return SplitVPOp(Op, DAG); if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1) return lowerVPSetCCMaskOp(Op, DAG); @@ -7510,16 +7485,12 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, case ISD::VP_FROUND: case ISD::VP_FROUNDEVEN: case ISD::VP_FROUNDTOZERO: - if (Op.getValueType() == MVT::nxv32f16 && - (Subtarget.hasVInstructionsF16Minimal() && - !Subtarget.hasVInstructionsF16())) + if (isPromotedOpNeedingSplit(Op, Subtarget)) return SplitVPOp(Op, DAG); return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); case ISD::VP_FMAXIMUM: case ISD::VP_FMINIMUM: - if (Op.getValueType() == MVT::nxv32f16 && - (Subtarget.hasVInstructionsF16Minimal() && - !Subtarget.hasVInstructionsF16())) + if (isPromotedOpNeedingSplit(Op, Subtarget)) return SplitVPOp(Op, DAG); return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget); case ISD::EXPERIMENTAL_VP_SPLICE: @@ -10804,7 +10775,8 @@ SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op, MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount()); SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0)); SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1); - return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Op2); + return DAG.getSetCC(DL, VecVT, Op2, + DAG.getConstant(0, DL, Op2.getValueType()), ISD::SETNE); } MVT ContainerVT = VecVT; diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp index 553d86efa3df..ca3e47a4b78f 100644 --- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp @@ -496,7 +496,7 @@ Register SPIRVGlobalRegistry::getOrCreateIntCompositeOrNull( assignSPIRVTypeToVReg(SpvType, SpvVecConst, *CurMF); DT.add(CA, CurMF, SpvVecConst); if (EmitIR) { - MIRBuilder.buildSplatVector(SpvVecConst, SpvScalConst); + MIRBuilder.buildSplatBuildVector(SpvVecConst, SpvScalConst); } else { if (Val) { auto MIB = MIRBuilder.buildInstr(SPIRV::OpConstantComposite) diff --git a/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp index c1200df5d44d..c7a0bebea969 100644 --- a/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp +++ b/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp @@ -70,14 +70,14 @@ class SparcAsmParser : public MCTargetAsmParser { /// } // public interface of the MCTargetAsmParser. - bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) override; bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override; ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override; - bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + bool parseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) override; ParseStatus parseDirective(AsmToken DirectiveID) override; @@ -789,7 +789,7 @@ bool SparcAsmParser::expandSETX(MCInst &Inst, SMLoc IDLoc, return false; } -bool SparcAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, +bool SparcAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, @@ -871,14 +871,14 @@ ParseStatus SparcAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, return ParseStatus::NoMatch; } -bool SparcAsmParser::ParseInstruction(ParseInstructionInfo &Info, +bool SparcAsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) { // Validate and reject unavailable mnemonics early before // running any operand parsing. // This is needed because some operands (mainly memory ones) // differ between V8 and V9 ISA and so any operand parsing errors - // will cause IAS to bail out before it reaches MatchAndEmitInstruction + // will cause IAS to bail out before it reaches matchAndEmitInstruction // (where the instruction as a whole, including the mnemonic, is validated // once again just before emission). // As a nice side effect this also allows us to reject unknown diff --git a/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp index 7c3898ac6731..5b26ba08dbdb 100644 --- a/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp +++ b/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp @@ -499,9 +499,9 @@ public: bool RequirePercent, bool RestoreOnFailure); ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override; - bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + bool parseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) override; - bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) override; @@ -1401,7 +1401,7 @@ ParseStatus SystemZAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, return ParseStatus::Success; } -bool SystemZAsmParser::ParseInstruction(ParseInstructionInfo &Info, +bool SystemZAsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) { @@ -1526,7 +1526,7 @@ bool SystemZAsmParser::parseOperand(OperandVector &Operands, return false; } -bool SystemZAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, +bool SystemZAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, diff --git a/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp b/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp index 691fe8fe3aa4..5073894cc7fb 100644 --- a/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp +++ b/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp @@ -51,7 +51,7 @@ class VEAsmParser : public MCTargetAsmParser { /// } // public interface of the MCTargetAsmParser. - bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) override; @@ -59,7 +59,7 @@ class VEAsmParser : public MCTargetAsmParser { int parseRegisterName(MCRegister (*matchFn)(StringRef)); ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override; - bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + bool parseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) override; ParseStatus parseDirective(AsmToken DirectiveID) override; @@ -760,7 +760,7 @@ public: } // end anonymous namespace -bool VEAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, +bool VEAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) { @@ -965,7 +965,7 @@ static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID); -bool VEAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, +bool VEAsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) { // If the target architecture uses MnemonicAlias, call it here to parse // operands correctly. diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp index 4fef5fa0ef22..8e8d08f77563 100644 --- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp +++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp @@ -562,7 +562,7 @@ public: } } - bool ParseInstruction(ParseInstructionInfo & /*Info*/, StringRef Name, + bool parseInstruction(ParseInstructionInfo & /*Info*/, StringRef Name, SMLoc NameLoc, OperandVector &Operands) override { // Note: Name does NOT point into the sourcecode, but to a local, so // use NameLoc instead. @@ -1127,7 +1127,7 @@ public: } } - bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned & /*Opcode*/, + bool matchAndEmitInstruction(SMLoc IDLoc, unsigned & /*Opcode*/, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) override { diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp index 215722204ba4..4c29b59b3302 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp @@ -110,6 +110,20 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, uint64_t Address, // Print any added annotation. printAnnotation(OS, Annot); + auto PrintBranchAnnotation = [&](const MCOperand &Op, + SmallSet<uint64_t, 8> &Printed) { + uint64_t Depth = Op.getImm(); + if (!Printed.insert(Depth).second) + return; + if (Depth >= ControlFlowStack.size()) { + printAnnotation(OS, "Invalid depth argument!"); + } else { + const auto &Pair = ControlFlowStack.rbegin()[Depth]; + printAnnotation(OS, utostr(Depth) + ": " + (Pair.second ? "up" : "down") + + " to label" + utostr(Pair.first)); + } + }; + if (CommentStream) { // Observe any effects on the control flow stack, for use in annotating // control flow label references. @@ -136,6 +150,23 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, uint64_t Address, EHInstStack.push_back(TRY); return; + case WebAssembly::TRY_TABLE: + case WebAssembly::TRY_TABLE_S: { + SmallSet<uint64_t, 8> Printed; + unsigned OpIdx = 1; + const MCOperand &Op = MI->getOperand(OpIdx++); + unsigned NumCatches = Op.getImm(); + for (unsigned I = 0; I < NumCatches; I++) { + int64_t CatchOpcode = MI->getOperand(OpIdx++).getImm(); + if (CatchOpcode == wasm::WASM_OPCODE_CATCH || + CatchOpcode == wasm::WASM_OPCODE_CATCH_REF) + OpIdx++; // Skip tag + PrintBranchAnnotation(MI->getOperand(OpIdx++), Printed); + } + ControlFlowStack.push_back(std::make_pair(ControlFlowCounter++, false)); + return; + } + case WebAssembly::END_LOOP: case WebAssembly::END_LOOP_S: if (ControlFlowStack.empty()) { @@ -147,6 +178,8 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, uint64_t Address, case WebAssembly::END_BLOCK: case WebAssembly::END_BLOCK_S: + case WebAssembly::END_TRY_TABLE: + case WebAssembly::END_TRY_TABLE_S: if (ControlFlowStack.empty()) { printAnnotation(OS, "End marker mismatch!"); } else { @@ -251,17 +284,7 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, uint64_t Address, if (!MI->getOperand(I).isImm()) continue; } - uint64_t Depth = MI->getOperand(I).getImm(); - if (!Printed.insert(Depth).second) - continue; - if (Depth >= ControlFlowStack.size()) { - printAnnotation(OS, "Invalid depth argument!"); - } else { - const auto &Pair = ControlFlowStack.rbegin()[Depth]; - printAnnotation(OS, utostr(Depth) + ": " + - (Pair.second ? "up" : "down") + " to label" + - utostr(Pair.first)); - } + PrintBranchAnnotation(MI->getOperand(I), Printed); } } } diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp index aaca213c4afe..3e7487dbd8f5 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp @@ -82,14 +82,15 @@ void WebAssemblyMCCodeEmitter::encodeInstruction( // For br_table instructions, encode the size of the table. In the MCInst, // there's an index operand (if not a stack instruction), one operand for // each table entry, and the default operand. - if (MI.getOpcode() == WebAssembly::BR_TABLE_I32_S || - MI.getOpcode() == WebAssembly::BR_TABLE_I64_S) + unsigned Opcode = MI.getOpcode(); + if (Opcode == WebAssembly::BR_TABLE_I32_S || + Opcode == WebAssembly::BR_TABLE_I64_S) encodeULEB128(MI.getNumOperands() - 1, OS); - if (MI.getOpcode() == WebAssembly::BR_TABLE_I32 || - MI.getOpcode() == WebAssembly::BR_TABLE_I64) + if (Opcode == WebAssembly::BR_TABLE_I32 || + Opcode == WebAssembly::BR_TABLE_I64) encodeULEB128(MI.getNumOperands() - 2, OS); - const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); + const MCInstrDesc &Desc = MCII.get(Opcode); for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) { const MCOperand &MO = MI.getOperand(I); if (MO.isReg()) { @@ -136,7 +137,12 @@ void WebAssemblyMCCodeEmitter::encodeInstruction( encodeULEB128(uint64_t(MO.getImm()), OS); } } else { - encodeULEB128(uint64_t(MO.getImm()), OS); + // Variadic immediate operands are br_table's destination operands or + // try_table's operands (# of catch clauses, catch sub-opcodes, or catch + // clause destinations) + assert(WebAssembly::isBrTable(Opcode) || + Opcode == WebAssembly::TRY_TABLE_S); + encodeULEB128(uint32_t(MO.getImm()), OS); } } else if (MO.isSFPImm()) { @@ -146,32 +152,38 @@ void WebAssemblyMCCodeEmitter::encodeInstruction( uint64_t D = MO.getDFPImm(); support::endian::write<uint64_t>(OS, D, llvm::endianness::little); } else if (MO.isExpr()) { - const MCOperandInfo &Info = Desc.operands()[I]; llvm::MCFixupKind FixupKind; size_t PaddedSize = 5; - switch (Info.OperandType) { - case WebAssembly::OPERAND_I32IMM: - FixupKind = MCFixupKind(WebAssembly::fixup_sleb128_i32); - break; - case WebAssembly::OPERAND_I64IMM: - FixupKind = MCFixupKind(WebAssembly::fixup_sleb128_i64); - PaddedSize = 10; - break; - case WebAssembly::OPERAND_FUNCTION32: - case WebAssembly::OPERAND_TABLE: - case WebAssembly::OPERAND_OFFSET32: - case WebAssembly::OPERAND_SIGNATURE: - case WebAssembly::OPERAND_TYPEINDEX: - case WebAssembly::OPERAND_GLOBAL: - case WebAssembly::OPERAND_TAG: + if (I < Desc.getNumOperands()) { + const MCOperandInfo &Info = Desc.operands()[I]; + switch (Info.OperandType) { + case WebAssembly::OPERAND_I32IMM: + FixupKind = MCFixupKind(WebAssembly::fixup_sleb128_i32); + break; + case WebAssembly::OPERAND_I64IMM: + FixupKind = MCFixupKind(WebAssembly::fixup_sleb128_i64); + PaddedSize = 10; + break; + case WebAssembly::OPERAND_FUNCTION32: + case WebAssembly::OPERAND_TABLE: + case WebAssembly::OPERAND_OFFSET32: + case WebAssembly::OPERAND_SIGNATURE: + case WebAssembly::OPERAND_TYPEINDEX: + case WebAssembly::OPERAND_GLOBAL: + case WebAssembly::OPERAND_TAG: + FixupKind = MCFixupKind(WebAssembly::fixup_uleb128_i32); + break; + case WebAssembly::OPERAND_OFFSET64: + FixupKind = MCFixupKind(WebAssembly::fixup_uleb128_i64); + PaddedSize = 10; + break; + default: + llvm_unreachable("unexpected symbolic operand kind"); + } + } else { + // Variadic expr operands are try_table's catch/catch_ref clauses' tags. + assert(Opcode == WebAssembly::TRY_TABLE_S); FixupKind = MCFixupKind(WebAssembly::fixup_uleb128_i32); - break; - case WebAssembly::OPERAND_OFFSET64: - FixupKind = MCFixupKind(WebAssembly::fixup_uleb128_i64); - PaddedSize = 10; - break; - default: - llvm_unreachable("unexpected symbolic operand kind"); } Fixups.push_back(MCFixup::create(OS.tell() - Start, MO.getExpr(), FixupKind, MI.getLoc())); diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index 735f9dcefb97..97079cba143a 100644 --- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -1195,7 +1195,7 @@ private: /// instrumentation around Inst. void emitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out); - bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) override; @@ -1215,7 +1215,7 @@ private: uint64_t &ErrorInfo, bool MatchingInlineAsm); - bool OmitRegisterFromClobberLists(unsigned RegNo) override; + bool omitRegisterFromClobberLists(unsigned RegNo) override; /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z}) /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required. @@ -1290,7 +1290,7 @@ public: bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override; - bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + bool parseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) override; bool ParseDirective(AsmToken DirectiveID) override; @@ -3186,7 +3186,7 @@ bool X86AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { return Parser.parsePrimaryExpr(Res, EndLoc, nullptr); } -bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, +bool X86AsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) { MCAsmParser &Parser = getParser(); InstInfo = &Info; @@ -4121,7 +4121,7 @@ static unsigned getPrefixes(OperandVector &Operands) { return Result; } -bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, +bool X86AsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) { @@ -4659,7 +4659,7 @@ bool X86AsmParser::matchAndEmitIntelInstruction( MatchingInlineAsm); } -bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) { +bool X86AsmParser::omitRegisterFromClobberLists(unsigned RegNo) { return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo); } diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp index 95038ccf63b8..a4b72515252a 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp @@ -1249,18 +1249,18 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, case X86::VBROADCASTF128rm: case X86::VBROADCASTI128rm: - CASE_AVX512_INS_COMMON(BROADCASTF64X2, Z128, rm) - CASE_AVX512_INS_COMMON(BROADCASTI64X2, Z128, rm) + CASE_AVX512_INS_COMMON(BROADCASTF64X2, Z256, rm) + CASE_AVX512_INS_COMMON(BROADCASTI64X2, Z256, rm) DecodeSubVectorBroadcast(4, 2, ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); break; - CASE_AVX512_INS_COMMON(BROADCASTF64X2, , rm) - CASE_AVX512_INS_COMMON(BROADCASTI64X2, , rm) + CASE_AVX512_INS_COMMON(BROADCASTF64X2, Z, rm) + CASE_AVX512_INS_COMMON(BROADCASTI64X2, Z, rm) DecodeSubVectorBroadcast(8, 2, ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); break; - CASE_AVX512_INS_COMMON(BROADCASTF64X4, , rm) - CASE_AVX512_INS_COMMON(BROADCASTI64X4, , rm) + CASE_AVX512_INS_COMMON(BROADCASTF64X4, Z, rm) + CASE_AVX512_INS_COMMON(BROADCASTI64X4, Z, rm) DecodeSubVectorBroadcast(8, 4, ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); break; @@ -1269,13 +1269,13 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DecodeSubVectorBroadcast(8, 4, ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); break; - CASE_AVX512_INS_COMMON(BROADCASTF32X4, , rm) - CASE_AVX512_INS_COMMON(BROADCASTI32X4, , rm) + CASE_AVX512_INS_COMMON(BROADCASTF32X4, Z, rm) + CASE_AVX512_INS_COMMON(BROADCASTI32X4, Z, rm) DecodeSubVectorBroadcast(16, 4, ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); break; - CASE_AVX512_INS_COMMON(BROADCASTF32X8, , rm) - CASE_AVX512_INS_COMMON(BROADCASTI32X8, , rm) + CASE_AVX512_INS_COMMON(BROADCASTF32X8, Z, rm) + CASE_AVX512_INS_COMMON(BROADCASTI32X8, Z, rm) DecodeSubVectorBroadcast(16, 8, ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); break; diff --git a/llvm/lib/Target/X86/X86DomainReassignment.cpp b/llvm/lib/Target/X86/X86DomainReassignment.cpp index 482318311398..9c667f5036dd 100644 --- a/llvm/lib/Target/X86/X86DomainReassignment.cpp +++ b/llvm/lib/Target/X86/X86DomainReassignment.cpp @@ -367,7 +367,7 @@ class X86DomainReassignment : public MachineFunctionPass { const X86InstrInfo *TII = nullptr; /// All edges that are included in some closure - BitVector EnclosedEdges{8, false}; + DenseMap<Register, unsigned> EnclosedEdges; /// All instructions that are included in some closure. DenseMap<MachineInstr *, unsigned> EnclosedInstrs; @@ -399,14 +399,16 @@ private: void buildClosure(Closure &, Register Reg); /// Enqueue \p Reg to be considered for addition to the closure. - void visitRegister(Closure &, Register Reg, RegDomain &Domain, + /// Return false if the closure becomes invalid. + bool visitRegister(Closure &, Register Reg, RegDomain &Domain, SmallVectorImpl<unsigned> &Worklist); /// Reassign the closure to \p Domain. void reassign(const Closure &C, RegDomain Domain) const; /// Add \p MI to the closure. - void encloseInstr(Closure &C, MachineInstr *MI); + /// Return false if the closure becomes invalid. + bool encloseInstr(Closure &C, MachineInstr *MI); /// /returns true if it is profitable to reassign the closure to \p Domain. bool isReassignmentProfitable(const Closure &C, RegDomain Domain) const; @@ -419,17 +421,23 @@ char X86DomainReassignment::ID = 0; } // End anonymous namespace. -void X86DomainReassignment::visitRegister(Closure &C, Register Reg, +bool X86DomainReassignment::visitRegister(Closure &C, Register Reg, RegDomain &Domain, SmallVectorImpl<unsigned> &Worklist) { if (!Reg.isVirtual()) - return; + return true; - if (EnclosedEdges.test(Register::virtReg2Index(Reg))) - return; + auto I = EnclosedEdges.find(Reg); + if (I != EnclosedEdges.end()) { + if (I->second != C.getID()) { + C.setAllIllegal(); + return false; + } + return true; + } if (!MRI->hasOneDef(Reg)) - return; + return true; RegDomain RD = getDomain(MRI->getRegClass(Reg), MRI->getTargetRegisterInfo()); // First edge in closure sets the domain. @@ -437,19 +445,22 @@ void X86DomainReassignment::visitRegister(Closure &C, Register Reg, Domain = RD; if (Domain != RD) - return; + return true; Worklist.push_back(Reg); + return true; } -void X86DomainReassignment::encloseInstr(Closure &C, MachineInstr *MI) { +bool X86DomainReassignment::encloseInstr(Closure &C, MachineInstr *MI) { auto I = EnclosedInstrs.find(MI); if (I != EnclosedInstrs.end()) { - if (I->second != C.getID()) + if (I->second != C.getID()) { // Instruction already belongs to another closure, avoid conflicts between // closure and mark this closure as illegal. C.setAllIllegal(); - return; + return false; + } + return true; } EnclosedInstrs[MI] = C.getID(); @@ -465,6 +476,7 @@ void X86DomainReassignment::encloseInstr(Closure &C, MachineInstr *MI) { C.setIllegal((RegDomain)i); } } + return C.hasLegalDstDomain(); } double X86DomainReassignment::calculateCost(const Closure &C, @@ -543,10 +555,11 @@ void X86DomainReassignment::buildClosure(Closure &C, Register Reg) { // Register already in this closure. if (!C.insertEdge(CurReg)) continue; - EnclosedEdges.set(Register::virtReg2Index(Reg)); + EnclosedEdges[Reg] = C.getID(); MachineInstr *DefMI = MRI->getVRegDef(CurReg); - encloseInstr(C, DefMI); + if (!encloseInstr(C, DefMI)) + return; // Add register used by the defining MI to the worklist. // Do not add registers which are used in address calculation, they will be @@ -565,7 +578,8 @@ void X86DomainReassignment::buildClosure(Closure &C, Register Reg) { auto &Op = DefMI->getOperand(OpIdx); if (!Op.isReg() || !Op.isUse()) continue; - visitRegister(C, Op.getReg(), Domain, Worklist); + if (!visitRegister(C, Op.getReg(), Domain, Worklist)) + return; } // Expand closure through register uses. @@ -574,9 +588,10 @@ void X86DomainReassignment::buildClosure(Closure &C, Register Reg) { // as this should remain in GPRs. if (usedAsAddr(UseMI, CurReg, TII)) { C.setAllIllegal(); - continue; + return; } - encloseInstr(C, &UseMI); + if (!encloseInstr(C, &UseMI)) + return; for (auto &DefOp : UseMI.defs()) { if (!DefOp.isReg()) @@ -585,9 +600,10 @@ void X86DomainReassignment::buildClosure(Closure &C, Register Reg) { Register DefReg = DefOp.getReg(); if (!DefReg.isVirtual()) { C.setAllIllegal(); - continue; + return; } - visitRegister(C, DefReg, Domain, Worklist); + if (!visitRegister(C, DefReg, Domain, Worklist)) + return; } } } @@ -775,7 +791,6 @@ bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; EnclosedEdges.clear(); - EnclosedEdges.resize(MRI->getNumVirtRegs()); EnclosedInstrs.clear(); std::vector<Closure> Closures; @@ -795,7 +810,7 @@ bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) { continue; // Register already in closure. - if (EnclosedEdges.test(Idx)) + if (EnclosedEdges.contains(Reg)) continue; // Calculate closure starting with Reg. diff --git a/llvm/lib/Target/X86/X86FixupVectorConstants.cpp b/llvm/lib/Target/X86/X86FixupVectorConstants.cpp index c9f79e1645f5..68a4a0be3a1d 100644 --- a/llvm/lib/Target/X86/X86FixupVectorConstants.cpp +++ b/llvm/lib/Target/X86/X86FixupVectorConstants.cpp @@ -439,8 +439,8 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF, case X86::VMOVUPSZrm: return FixupConstant({{X86::VBROADCASTSSZrm, 1, 32, rebuildSplatCst}, {X86::VBROADCASTSDZrm, 1, 64, rebuildSplatCst}, - {X86::VBROADCASTF32X4rm, 1, 128, rebuildSplatCst}, - {X86::VBROADCASTF64X4rm, 1, 256, rebuildSplatCst}}, + {X86::VBROADCASTF32X4Zrm, 1, 128, rebuildSplatCst}, + {X86::VBROADCASTF64X4Zrm, 1, 256, rebuildSplatCst}}, 512, 1); /* Integer Loads */ case X86::MOVDQArm: @@ -572,12 +572,12 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF, {X86::VPBROADCASTQZrm, 1, 64, rebuildSplatCst}, {X86::VPMOVSXBQZrm, 8, 8, rebuildSExtCst}, {X86::VPMOVZXBQZrm, 8, 8, rebuildZExtCst}, - {X86::VBROADCASTI32X4rm, 1, 128, rebuildSplatCst}, + {X86::VBROADCASTI32X4Zrm, 1, 128, rebuildSplatCst}, {X86::VPMOVSXBDZrm, 16, 8, rebuildSExtCst}, {X86::VPMOVZXBDZrm, 16, 8, rebuildZExtCst}, {X86::VPMOVSXWQZrm, 8, 16, rebuildSExtCst}, {X86::VPMOVZXWQZrm, 8, 16, rebuildZExtCst}, - {X86::VBROADCASTI64X4rm, 1, 256, rebuildSplatCst}, + {X86::VBROADCASTI64X4Zrm, 1, 256, rebuildSplatCst}, {HasBWI ? X86::VPMOVSXBWZrm : 0, 32, 8, rebuildSExtCst}, {HasBWI ? X86::VPMOVZXBWZrm : 0, 32, 8, rebuildZExtCst}, {X86::VPMOVSXWDZrm, 16, 16, rebuildSExtCst}, diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index f6d42ade6008..68563f556ecf 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -12348,7 +12348,7 @@ static SDValue lowerShuffleAsElementInsertion( } V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ExtVT, V2S); } else if (Mask[V2Index] != (int)Mask.size() || EltVT == MVT::i8 || - EltVT == MVT::i16) { + (EltVT == MVT::i16 && !Subtarget.hasAVX10_2())) { // Either not inserting from the low element of the input or the input // element size is too small to use VZEXT_MOVL to clear the high bits. return SDValue(); @@ -26159,22 +26159,43 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, if (CC == ISD::SETLT || CC == ISD::SETLE) std::swap(LHS, RHS); - SDValue Comi = DAG.getNode(IntrData->Opc0, dl, MVT::i32, LHS, RHS); + // For AVX10.2, Support EQ and NE. + bool HasAVX10_2_COMX = + Subtarget.hasAVX10_2() && (CC == ISD::SETEQ || CC == ISD::SETNE); + + // AVX10.2 COMPARE supports only v2f64, v4f32 or v8f16. + // For BF type we need to fall back. + bool HasAVX10_2_COMX_Ty = (LHS.getSimpleValueType() != MVT::v8bf16); + + auto ComiOpCode = IntrData->Opc0; + auto isUnordered = (ComiOpCode == X86ISD::UCOMI); + + if (HasAVX10_2_COMX && HasAVX10_2_COMX_Ty) + ComiOpCode = isUnordered ? X86ISD::UCOMX : X86ISD::COMX; + + SDValue Comi = DAG.getNode(ComiOpCode, dl, MVT::i32, LHS, RHS); + SDValue SetCC; switch (CC) { - case ISD::SETEQ: { // (ZF = 0 and PF = 0) + case ISD::SETEQ: { SetCC = getSETCC(X86::COND_E, Comi, dl, DAG); + if (HasAVX10_2_COMX & HasAVX10_2_COMX_Ty) // ZF == 1 + break; + // (ZF = 1 and PF = 0) SDValue SetNP = getSETCC(X86::COND_NP, Comi, dl, DAG); SetCC = DAG.getNode(ISD::AND, dl, MVT::i8, SetCC, SetNP); break; } - case ISD::SETNE: { // (ZF = 1 or PF = 1) + case ISD::SETNE: { SetCC = getSETCC(X86::COND_NE, Comi, dl, DAG); + if (HasAVX10_2_COMX & HasAVX10_2_COMX_Ty) // ZF == 0 + break; + // (ZF = 0 or PF = 1) SDValue SetP = getSETCC(X86::COND_P, Comi, dl, DAG); SetCC = DAG.getNode(ISD::OR, dl, MVT::i8, SetCC, SetP); break; } - case ISD::SETGT: // (CF = 0 and ZF = 0) + case ISD::SETGT: // (CF = 0 and ZF = 0) case ISD::SETLT: { // Condition opposite to GT. Operands swapped above. SetCC = getSETCC(X86::COND_A, Comi, dl, DAG); break; @@ -34083,6 +34104,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(STRICT_FCMPS) NODE_NAME_CASE(COMI) NODE_NAME_CASE(UCOMI) + NODE_NAME_CASE(COMX) + NODE_NAME_CASE(UCOMX) NODE_NAME_CASE(CMPM) NODE_NAME_CASE(CMPMM) NODE_NAME_CASE(STRICT_CMPM) @@ -57768,6 +57791,19 @@ static SDValue combineEXTRACT_SUBVECTOR(SDNode *N, SelectionDAG &DAG, DAG.getTargetConstant(M, DL, MVT::i8)); } break; + case X86ISD::VPERMV3: + if (IdxVal != 0) { + SDValue Src0 = InVec.getOperand(0); + SDValue Mask = InVec.getOperand(1); + SDValue Src1 = InVec.getOperand(2); + Mask = extractSubVector(Mask, IdxVal, DAG, DL, SizeInBits); + Mask = widenSubVector(Mask, /*ZeroNewElements=*/false, Subtarget, DAG, + DL, InSizeInBits); + SDValue Shuffle = + DAG.getNode(InOpcode, DL, InVecVT, Src0, Mask, Src1); + return extractSubVector(Shuffle, 0, DAG, DL, SizeInBits); + } + break; } } } diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 5fb588675680..ae7da8efb5f9 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -87,6 +87,10 @@ namespace llvm { COMI, UCOMI, + // X86 compare with Intrinsics similar to COMI. + COMX, + UCOMX, + /// X86 bit-test instructions. BT, diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td index ada2bbaffd66..625f2e01d472 100644 --- a/llvm/lib/Target/X86/X86InstrAVX10.td +++ b/llvm/lib/Target/X86/X86InstrAVX10.td @@ -1537,3 +1537,113 @@ defm VFNMADD132NEPBF16 : avx10_fma3p_132_bf16<0x9C, "vfnmadd132nepbf16", X86any_ defm VFNMSUB132NEPBF16 : avx10_fma3p_132_bf16<0x9E, "vfnmsub132nepbf16", X86any_Fnmsub, X86Fnmsub, SchedWriteFMA>; } + +//------------------------------------------------- +// AVX10 COMEF instructions +//------------------------------------------------- +multiclass avx10_com_ef_int<bits<8> Opc, X86VectorVTInfo _, SDNode OpNode, + string OpcodeStr, + Domain d, + X86FoldableSchedWrite sched = WriteFComX> { + let ExeDomain = d, mayRaiseFPException = 1 in { + def rr_Int : AVX512<Opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), + [(set EFLAGS, (OpNode (_.VT _.RC:$src1), _.RC:$src2))]>, + EVEX, EVEX_V128, Sched<[sched]>, SIMD_EXC; + let mayLoad = 1 in { + def rm_Int : AVX512<Opc, MRMSrcMem, (outs), (ins _.RC:$src1, _.ScalarMemOp:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), + [(set EFLAGS, (OpNode (_.VT _.RC:$src1), (_.LdFrag addr:$src2)))]>, + EVEX, EVEX_V128, Sched<[sched]>, SIMD_EXC; + } + def rrb_Int : AVX512<Opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2), + !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), + []>, + EVEX, EVEX_V128, EVEX_B, Sched<[sched]>, SIMD_EXC; + } +} + +let Defs = [EFLAGS], Uses = [MXCSR], Predicates = [HasAVX10_2] in { + defm VCOMXSDZ : avx10_com_ef_int<0x2f, v2f64x_info, X86comi512, + "vcomxsd", SSEPackedDouble>, + TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; + defm VCOMXSHZ : avx10_com_ef_int<0x2f, v8f16x_info, X86comi512, + "vcomxsh", SSEPackedSingle>, + T_MAP5, XD, EVEX_CD8<16, CD8VT1>; + defm VCOMXSSZ : avx10_com_ef_int<0x2f, v4f32x_info, X86comi512, + "vcomxss", SSEPackedSingle>, + TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; + defm VUCOMXSDZ : avx10_com_ef_int<0x2e, v2f64x_info, X86ucomi512, + "vucomxsd", SSEPackedDouble>, + TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; + defm VUCOMXSHZ : avx10_com_ef_int<0x2e, v8f16x_info, X86ucomi512, + "vucomxsh", SSEPackedSingle>, + T_MAP5, XD, EVEX_CD8<16, CD8VT1>; + defm VUCOMXSSZ : avx10_com_ef_int<0x2e, v4f32x_info, X86ucomi512, + "vucomxss", SSEPackedSingle>, + TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; +} + +//------------------------------------------------- +// AVX10 MOVZXC (COPY) instructions +//------------------------------------------------- +let Predicates = [HasAVX10_2] in { + def VMOVZPDILo2PDIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst), + (ins VR128X:$src), + "vmovd\t{$src, $dst|$dst, $src}", + [(set VR128X:$dst, (v4i32 (X86vzmovl + (v4i32 VR128X:$src))))]>, EVEX, + Sched<[WriteVecMoveFromGpr]>; + +let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in + def VMOVZPDILo2PDIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst), + (ins i32mem:$src), + "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX, + EVEX_CD8<32, CD8VT1>, + Sched<[WriteVecLoad]>; + +let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in + def VMOVZPDILo2PDIZmr : AVX512PDI<0xD6, MRMDestMem, (outs), + (ins i32mem:$dst, VR128X:$src), + "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX, + EVEX_CD8<32, CD8VT1>, + Sched<[WriteVecStore]>; + +let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in + def VMOVZPDILo2PDIZrr2 : AVX512PDI<0xD6, MRMSrcReg, (outs VR128X:$dst), + (ins VR128X:$src), + "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX, + Sched<[WriteVecMoveFromGpr]>; + def : InstAlias<"vmovd.s\t{$src, $dst|$dst, $src}", + (VMOVZPDILo2PDIZrr2 VR128X:$dst, VR128X:$src), 0>; + +def VMOVZPWILo2PWIZrr : AVX512XSI<0x6E, MRMSrcReg, (outs VR128X:$dst), + (ins VR128X:$src), + "vmovw\t{$src, $dst|$dst, $src}", + [(set VR128X:$dst, (v8i16 (X86vzmovl + (v8i16 VR128X:$src))))]>, EVEX, T_MAP5, + Sched<[WriteVecMoveFromGpr]>; + +let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in + def VMOVZPWILo2PWIZrm : AVX512XSI<0x6E, MRMSrcMem, (outs VR128X:$dst), + (ins i16mem:$src), + "vmovw\t{$src, $dst|$dst, $src}", []>, EVEX, + EVEX_CD8<16, CD8VT1>, T_MAP5, + Sched<[WriteVecLoad]>; + +let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in + def VMOVZPWILo2PWIZmr : AVX512XSI<0x7E, MRMDestMem, (outs), + (ins i32mem:$dst, VR128X:$src), + "vmovw\t{$src, $dst|$dst, $src}", []>, EVEX, + EVEX_CD8<16, CD8VT1>, T_MAP5, + Sched<[WriteVecStore]>; + +let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in + def VMOVZPWILo2PWIZrr2 : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst), + (ins VR128X:$src), + "vmovw\t{$src, $dst|$dst, $src}", + []>, EVEX, T_MAP5, + Sched<[WriteVecMoveFromGpr]>; + def : InstAlias<"vmovw.s\t{$src, $dst|$dst, $src}", + (VMOVZPWILo2PWIZrr2 VR128X:$dst, VR128X:$src), 0>; +} diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 9ed59803c1f9..928abac46da8 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -1337,84 +1337,84 @@ let Predicates = [HasVLX, HasBWI] in { // AVX-512 BROADCAST SUBVECTORS // -defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", - X86SubVBroadcastld128, v16i32_info, v4i32x_info>, - EVEX_V512, EVEX_CD8<32, CD8VT4>; -defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4", - X86SubVBroadcastld128, v16f32_info, v4f32x_info>, - EVEX_V512, EVEX_CD8<32, CD8VT4>; -defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4", - X86SubVBroadcastld256, v8i64_info, v4i64x_info>, REX_W, - EVEX_V512, EVEX_CD8<64, CD8VT4>; -defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4", - X86SubVBroadcastld256, v8f64_info, v4f64x_info>, REX_W, - EVEX_V512, EVEX_CD8<64, CD8VT4>; +defm VBROADCASTI32X4Z : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", + X86SubVBroadcastld128, v16i32_info, v4i32x_info>, + EVEX_V512, EVEX_CD8<32, CD8VT4>; +defm VBROADCASTF32X4Z : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4", + X86SubVBroadcastld128, v16f32_info, v4f32x_info>, + EVEX_V512, EVEX_CD8<32, CD8VT4>; +defm VBROADCASTI64X4Z : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4", + X86SubVBroadcastld256, v8i64_info, v4i64x_info>, REX_W, + EVEX_V512, EVEX_CD8<64, CD8VT4>; +defm VBROADCASTF64X4Z : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4", + X86SubVBroadcastld256, v8f64_info, v4f64x_info>, REX_W, + EVEX_V512, EVEX_CD8<64, CD8VT4>; let Predicates = [HasAVX512] in { def : Pat<(v8f64 (X86SubVBroadcastld256 addr:$src)), - (VBROADCASTF64X4rm addr:$src)>; + (VBROADCASTF64X4Zrm addr:$src)>; def : Pat<(v16f32 (X86SubVBroadcastld256 addr:$src)), - (VBROADCASTF64X4rm addr:$src)>; + (VBROADCASTF64X4Zrm addr:$src)>; def : Pat<(v32f16 (X86SubVBroadcastld256 addr:$src)), - (VBROADCASTF64X4rm addr:$src)>; + (VBROADCASTF64X4Zrm addr:$src)>; def : Pat<(v8i64 (X86SubVBroadcastld256 addr:$src)), - (VBROADCASTI64X4rm addr:$src)>; + (VBROADCASTI64X4Zrm addr:$src)>; def : Pat<(v16i32 (X86SubVBroadcastld256 addr:$src)), - (VBROADCASTI64X4rm addr:$src)>; + (VBROADCASTI64X4Zrm addr:$src)>; def : Pat<(v32i16 (X86SubVBroadcastld256 addr:$src)), - (VBROADCASTI64X4rm addr:$src)>; + (VBROADCASTI64X4Zrm addr:$src)>; def : Pat<(v64i8 (X86SubVBroadcastld256 addr:$src)), - (VBROADCASTI64X4rm addr:$src)>; + (VBROADCASTI64X4Zrm addr:$src)>; def : Pat<(v8f64 (X86SubVBroadcastld128 addr:$src)), - (VBROADCASTF32X4rm addr:$src)>; + (VBROADCASTF32X4Zrm addr:$src)>; def : Pat<(v16f32 (X86SubVBroadcastld128 addr:$src)), - (VBROADCASTF32X4rm addr:$src)>; + (VBROADCASTF32X4Zrm addr:$src)>; def : Pat<(v32f16 (X86SubVBroadcastld128 addr:$src)), - (VBROADCASTF32X4rm addr:$src)>; + (VBROADCASTF32X4Zrm addr:$src)>; def : Pat<(v8i64 (X86SubVBroadcastld128 addr:$src)), - (VBROADCASTI32X4rm addr:$src)>; + (VBROADCASTI32X4Zrm addr:$src)>; def : Pat<(v16i32 (X86SubVBroadcastld128 addr:$src)), - (VBROADCASTI32X4rm addr:$src)>; + (VBROADCASTI32X4Zrm addr:$src)>; def : Pat<(v32i16 (X86SubVBroadcastld128 addr:$src)), - (VBROADCASTI32X4rm addr:$src)>; + (VBROADCASTI32X4Zrm addr:$src)>; def : Pat<(v64i8 (X86SubVBroadcastld128 addr:$src)), - (VBROADCASTI32X4rm addr:$src)>; + (VBROADCASTI32X4Zrm addr:$src)>; // Patterns for selects of bitcasted operations. def : Pat<(vselect_mask VK16WM:$mask, (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))), (v16f32 immAllZerosV)), - (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>; + (VBROADCASTF32X4Zrmkz VK16WM:$mask, addr:$src)>; def : Pat<(vselect_mask VK16WM:$mask, (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))), VR512:$src0), - (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>; + (VBROADCASTF32X4Zrmk VR512:$src0, VK16WM:$mask, addr:$src)>; def : Pat<(vselect_mask VK16WM:$mask, (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))), (v16i32 immAllZerosV)), - (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>; + (VBROADCASTI32X4Zrmkz VK16WM:$mask, addr:$src)>; def : Pat<(vselect_mask VK16WM:$mask, (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))), VR512:$src0), - (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>; + (VBROADCASTI32X4Zrmk VR512:$src0, VK16WM:$mask, addr:$src)>; def : Pat<(vselect_mask VK8WM:$mask, (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))), (v8f64 immAllZerosV)), - (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>; + (VBROADCASTF64X4Zrmkz VK8WM:$mask, addr:$src)>; def : Pat<(vselect_mask VK8WM:$mask, (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))), VR512:$src0), - (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>; + (VBROADCASTF64X4Zrmk VR512:$src0, VK8WM:$mask, addr:$src)>; def : Pat<(vselect_mask VK8WM:$mask, (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))), (v8i64 immAllZerosV)), - (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>; + (VBROADCASTI64X4Zrmkz VK8WM:$mask, addr:$src)>; def : Pat<(vselect_mask VK8WM:$mask, (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))), VR512:$src0), - (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>; + (VBROADCASTI64X4Zrmk VR512:$src0, VK8WM:$mask, addr:$src)>; } let Predicates = [HasVLX] in { @@ -1461,9 +1461,9 @@ def : Pat<(vselect_mask VK8WM:$mask, let Predicates = [HasBF16] in { def : Pat<(v32bf16 (X86SubVBroadcastld256 addr:$src)), - (VBROADCASTF64X4rm addr:$src)>; + (VBROADCASTF64X4Zrm addr:$src)>; def : Pat<(v32bf16 (X86SubVBroadcastld128 addr:$src)), - (VBROADCASTF32X4rm addr:$src)>; + (VBROADCASTF32X4Zrm addr:$src)>; } let Predicates = [HasBF16, HasVLX] in @@ -1471,10 +1471,10 @@ let Predicates = [HasBF16, HasVLX] in (VBROADCASTF32X4Z256rm addr:$src)>; let Predicates = [HasVLX, HasDQI] in { -defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2", +defm VBROADCASTI64X2Z256 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2", X86SubVBroadcastld128, v4i64x_info, v2i64x_info>, EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W; -defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2", +defm VBROADCASTF64X2Z256 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2", X86SubVBroadcastld128, v4f64x_info, v2f64x_info>, EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W; @@ -1482,69 +1482,69 @@ defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2" def : Pat<(vselect_mask VK4WM:$mask, (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))), (v4f64 immAllZerosV)), - (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>; + (VBROADCASTF64X2Z256rmkz VK4WM:$mask, addr:$src)>; def : Pat<(vselect_mask VK4WM:$mask, (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))), VR256X:$src0), - (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>; + (VBROADCASTF64X2Z256rmk VR256X:$src0, VK4WM:$mask, addr:$src)>; def : Pat<(vselect_mask VK4WM:$mask, (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))), (v4i64 immAllZerosV)), - (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>; + (VBROADCASTI64X2Z256rmkz VK4WM:$mask, addr:$src)>; def : Pat<(vselect_mask VK4WM:$mask, (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))), VR256X:$src0), - (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>; + (VBROADCASTI64X2Z256rmk VR256X:$src0, VK4WM:$mask, addr:$src)>; } let Predicates = [HasDQI] in { -defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2", - X86SubVBroadcastld128, v8i64_info, v2i64x_info>, REX_W, - EVEX_V512, EVEX_CD8<64, CD8VT2>; -defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8", - X86SubVBroadcastld256, v16i32_info, v8i32x_info>, - EVEX_V512, EVEX_CD8<32, CD8VT8>; -defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2", - X86SubVBroadcastld128, v8f64_info, v2f64x_info>, REX_W, - EVEX_V512, EVEX_CD8<64, CD8VT2>; -defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8", - X86SubVBroadcastld256, v16f32_info, v8f32x_info>, - EVEX_V512, EVEX_CD8<32, CD8VT8>; +defm VBROADCASTI64X2Z : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2", + X86SubVBroadcastld128, v8i64_info, v2i64x_info>, REX_W, + EVEX_V512, EVEX_CD8<64, CD8VT2>; +defm VBROADCASTI32X8Z : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8", + X86SubVBroadcastld256, v16i32_info, v8i32x_info>, + EVEX_V512, EVEX_CD8<32, CD8VT8>; +defm VBROADCASTF64X2Z : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2", + X86SubVBroadcastld128, v8f64_info, v2f64x_info>, REX_W, + EVEX_V512, EVEX_CD8<64, CD8VT2>; +defm VBROADCASTF32X8Z : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8", + X86SubVBroadcastld256, v16f32_info, v8f32x_info>, + EVEX_V512, EVEX_CD8<32, CD8VT8>; // Patterns for selects of bitcasted operations. def : Pat<(vselect_mask VK16WM:$mask, (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))), (v16f32 immAllZerosV)), - (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>; + (VBROADCASTF32X8Zrmkz VK16WM:$mask, addr:$src)>; def : Pat<(vselect_mask VK16WM:$mask, (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))), VR512:$src0), - (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>; + (VBROADCASTF32X8Zrmk VR512:$src0, VK16WM:$mask, addr:$src)>; def : Pat<(vselect_mask VK16WM:$mask, (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))), (v16i32 immAllZerosV)), - (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>; + (VBROADCASTI32X8Zrmkz VK16WM:$mask, addr:$src)>; def : Pat<(vselect_mask VK16WM:$mask, (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))), VR512:$src0), - (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>; + (VBROADCASTI32X8Zrmk VR512:$src0, VK16WM:$mask, addr:$src)>; def : Pat<(vselect_mask VK8WM:$mask, (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))), (v8f64 immAllZerosV)), - (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>; + (VBROADCASTF64X2Zrmkz VK8WM:$mask, addr:$src)>; def : Pat<(vselect_mask VK8WM:$mask, (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))), VR512:$src0), - (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>; + (VBROADCASTF64X2Zrmk VR512:$src0, VK8WM:$mask, addr:$src)>; def : Pat<(vselect_mask VK8WM:$mask, (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))), (v8i64 immAllZerosV)), - (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>; + (VBROADCASTI64X2Zrmkz VK8WM:$mask, addr:$src)>; def : Pat<(vselect_mask VK8WM:$mask, (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))), VR512:$src0), - (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>; + (VBROADCASTI64X2Zrmk VR512:$src0, VK8WM:$mask, addr:$src)>; } multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr, diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index af39b1ab82d6..ed1bff05b731 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -61,7 +61,8 @@ def X86hadd : SDNode<"X86ISD::HADD", SDTIntBinOp>; def X86hsub : SDNode<"X86ISD::HSUB", SDTIntBinOp>; def X86comi : SDNode<"X86ISD::COMI", SDTX86FCmp>; def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86FCmp>; - +def X86comi512 : SDNode<"X86ISD::COMX", SDTX86FCmp>; +def X86ucomi512 : SDNode<"X86ISD::UCOMX", SDTX86FCmp>; def SDTX86Cmps : SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>; def X86cmps : SDNode<"X86ISD::FSETCC", SDTX86Cmps>; diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 57a894b09e04..38ea1f35be2b 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -6246,16 +6246,16 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { } case X86::VMOVAPSZ128rm_NOVLX: return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVAPSrm), - get(X86::VBROADCASTF32X4rm), X86::sub_xmm); + get(X86::VBROADCASTF32X4Zrm), X86::sub_xmm); case X86::VMOVUPSZ128rm_NOVLX: return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVUPSrm), - get(X86::VBROADCASTF32X4rm), X86::sub_xmm); + get(X86::VBROADCASTF32X4Zrm), X86::sub_xmm); case X86::VMOVAPSZ256rm_NOVLX: return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVAPSYrm), - get(X86::VBROADCASTF64X4rm), X86::sub_ymm); + get(X86::VBROADCASTF64X4Zrm), X86::sub_ymm); case X86::VMOVUPSZ256rm_NOVLX: return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVUPSYrm), - get(X86::VBROADCASTF64X4rm), X86::sub_ymm); + get(X86::VBROADCASTF64X4Zrm), X86::sub_ymm); case X86::VMOVAPSZ128mr_NOVLX: return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVAPSmr), get(X86::VEXTRACTF32x4Zmri), X86::sub_xmm); diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp index 77ddd2366e62..55c237e2df2d 100644 --- a/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -2051,21 +2051,21 @@ static void addConstantComments(const MachineInstr *MI, case X86::VBROADCASTF128rm: case X86::VBROADCASTI128rm: MASK_AVX512_CASE(X86::VBROADCASTF32X4Z256rm) - MASK_AVX512_CASE(X86::VBROADCASTF64X2Z128rm) + MASK_AVX512_CASE(X86::VBROADCASTF64X2Z256rm) MASK_AVX512_CASE(X86::VBROADCASTI32X4Z256rm) - MASK_AVX512_CASE(X86::VBROADCASTI64X2Z128rm) + MASK_AVX512_CASE(X86::VBROADCASTI64X2Z256rm) printBroadcast(MI, OutStreamer, 2, 128); break; - MASK_AVX512_CASE(X86::VBROADCASTF32X4rm) - MASK_AVX512_CASE(X86::VBROADCASTF64X2rm) - MASK_AVX512_CASE(X86::VBROADCASTI32X4rm) - MASK_AVX512_CASE(X86::VBROADCASTI64X2rm) + MASK_AVX512_CASE(X86::VBROADCASTF32X4Zrm) + MASK_AVX512_CASE(X86::VBROADCASTF64X2Zrm) + MASK_AVX512_CASE(X86::VBROADCASTI32X4Zrm) + MASK_AVX512_CASE(X86::VBROADCASTI64X2Zrm) printBroadcast(MI, OutStreamer, 4, 128); break; - MASK_AVX512_CASE(X86::VBROADCASTF32X8rm) - MASK_AVX512_CASE(X86::VBROADCASTF64X4rm) - MASK_AVX512_CASE(X86::VBROADCASTI32X8rm) - MASK_AVX512_CASE(X86::VBROADCASTI64X4rm) + MASK_AVX512_CASE(X86::VBROADCASTF32X8Zrm) + MASK_AVX512_CASE(X86::VBROADCASTF64X4Zrm) + MASK_AVX512_CASE(X86::VBROADCASTI32X8Zrm) + MASK_AVX512_CASE(X86::VBROADCASTI64X4Zrm) printBroadcast(MI, OutStreamer, 2, 256); break; diff --git a/llvm/lib/Target/X86/X86SchedIceLake.td b/llvm/lib/Target/X86/X86SchedIceLake.td index 72fbcc559810..a5051d932d4e 100644 --- a/llvm/lib/Target/X86/X86SchedIceLake.td +++ b/llvm/lib/Target/X86/X86SchedIceLake.td @@ -1576,19 +1576,19 @@ def: InstRW<[ICXWriteResGroup121, ReadAfterVecYLd], "VBROADCASTF32X2Z256rm(b?)", "VBROADCASTF32X2Zrm(b?)", "VBROADCASTF32X4Z256rm(b?)", - "VBROADCASTF32X4rm(b?)", - "VBROADCASTF32X8rm(b?)", - "VBROADCASTF64X2Z128rm(b?)", - "VBROADCASTF64X2rm(b?)", - "VBROADCASTF64X4rm(b?)", + "VBROADCASTF32X4Zrm(b?)", + "VBROADCASTF32X8Zrm(b?)", + "VBROADCASTF64X2Z256rm(b?)", + "VBROADCASTF64X2Zrm(b?)", + "VBROADCASTF64X4Zrm(b?)", "VBROADCASTI32X2Z256rm(b?)", "VBROADCASTI32X2Zrm(b?)", "VBROADCASTI32X4Z256rm(b?)", - "VBROADCASTI32X4rm(b?)", - "VBROADCASTI32X8rm(b?)", - "VBROADCASTI64X2Z128rm(b?)", - "VBROADCASTI64X2rm(b?)", - "VBROADCASTI64X4rm(b?)", + "VBROADCASTI32X4Zrm(b?)", + "VBROADCASTI32X8Zrm(b?)", + "VBROADCASTI64X2Z256rm(b?)", + "VBROADCASTI64X2Zrm(b?)", + "VBROADCASTI64X4Zrm(b?)", "VBROADCASTSD(Z|Z256)rm(b?)", "VBROADCASTSS(Z|Z256)rm(b?)", "VINSERTF32x4(Z|Z256)rm(b?)", diff --git a/llvm/lib/Target/X86/X86SchedSapphireRapids.td b/llvm/lib/Target/X86/X86SchedSapphireRapids.td index 9818f4c01ea6..6e292da4e293 100644 --- a/llvm/lib/Target/X86/X86SchedSapphireRapids.td +++ b/llvm/lib/Target/X86/X86SchedSapphireRapids.td @@ -1601,9 +1601,9 @@ def SPRWriteResGroup126 : SchedWriteRes<[SPRPort02_03_11]> { def : InstRW<[SPRWriteResGroup126], (instregex "^MMX_MOV(D|Q)64rm$", "^VBROADCAST(F|I)128rm$", "^VBROADCAST(F|I)32X(2|4)Z256rm$", - "^VBROADCAST(F|I)32X(8|2Z)rm$", - "^VBROADCAST(F|I)(32|64)X4rm$", - "^VBROADCAST(F|I)64X2((Z128)?)rm$", + "^VBROADCAST(F|I)32X(8|2)Zrm$", + "^VBROADCAST(F|I)(32|64)X4Zrm$", + "^VBROADCAST(F|I)64X2(Z|Z256)rm$", "^VBROADCASTS(DY|SZ)rm$", "^VBROADCASTS(D|S)Z256rm$", "^VBROADCASTS(DZ|SY)rm$", @@ -1652,9 +1652,9 @@ def SPRWriteResGroup131 : SchedWriteRes<[SPRPort00_05, SPRPort02_03_11]> { let Latency = 9; let NumMicroOps = 2; } -def : InstRW<[SPRWriteResGroup131], (instregex "^VBROADCAST(F|I)32X(8|2Z)rmk(z?)$", - "^VBROADCAST(F|I)(32|64)X4rmk(z?)$", - "^VBROADCAST(F|I)64X2rmk(z?)$", +def : InstRW<[SPRWriteResGroup131], (instregex "^VBROADCAST(F|I)32X(8|2)Zrmk(z?)$", + "^VBROADCAST(F|I)(32|64)X4Zrmk(z?)$", + "^VBROADCAST(F|I)64X2Zrmk(z?)$", "^VBROADCASTS(D|S)Zrmk(z?)$", "^VMOV(A|U)P(D|S)Zrmk(z?)$", "^VMOV(D|SH|SL)DUPZrmk(z?)$", @@ -2698,7 +2698,7 @@ def SPRWriteResGroup262 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11]> { let NumMicroOps = 2; } def : InstRW<[SPRWriteResGroup262], (instregex "^VBROADCAST(F|I)32X(2|4)Z256rmk(z?)$", - "^VBROADCAST(F|I)64X2Z128rmk(z?)$", + "^VBROADCAST(F|I)64X2Z256rmk(z?)$", "^VBROADCASTS(D|S)Z256rmk(z?)$", "^VMOV(A|U)P(D|S)Z256rmk(z?)$", "^VMOV(D|SH|SL)DUPZ256rmk(z?)$", diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index 26e290a2250c..e733d9ac74dd 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -1547,19 +1547,19 @@ def: InstRW<[SKXWriteResGroup121, ReadAfterVecYLd], "VBROADCASTF32X2Z256rm(b?)", "VBROADCASTF32X2Zrm(b?)", "VBROADCASTF32X4Z256rm(b?)", - "VBROADCASTF32X4rm(b?)", - "VBROADCASTF32X8rm(b?)", - "VBROADCASTF64X2Z128rm(b?)", - "VBROADCASTF64X2rm(b?)", - "VBROADCASTF64X4rm(b?)", + "VBROADCASTF32X4Zrm(b?)", + "VBROADCASTF32X8Zrm(b?)", + "VBROADCASTF64X2Z256rm(b?)", + "VBROADCASTF64X2Zrm(b?)", + "VBROADCASTF64X4Zrm(b?)", "VBROADCASTI32X2Z256rm(b?)", "VBROADCASTI32X2Zrm(b?)", "VBROADCASTI32X4Z256rm(b?)", - "VBROADCASTI32X4rm(b?)", - "VBROADCASTI32X8rm(b?)", - "VBROADCASTI64X2Z128rm(b?)", - "VBROADCASTI64X2rm(b?)", - "VBROADCASTI64X4rm(b?)", + "VBROADCASTI32X4Zrm(b?)", + "VBROADCASTI32X8Zrm(b?)", + "VBROADCASTI64X2Z256rm(b?)", + "VBROADCASTI64X2Zrm(b?)", + "VBROADCASTI64X4Zrm(b?)", "VBROADCASTSD(Z|Z256)rm(b?)", "VBROADCASTSS(Z|Z256)rm(b?)", "VINSERTF32x4(Z|Z256)rm(b?)", diff --git a/llvm/lib/Target/Xtensa/AsmParser/XtensaAsmParser.cpp b/llvm/lib/Target/Xtensa/AsmParser/XtensaAsmParser.cpp index b0ce624a495f..83b1cfca529b 100644 --- a/llvm/lib/Target/Xtensa/AsmParser/XtensaAsmParser.cpp +++ b/llvm/lib/Target/Xtensa/AsmParser/XtensaAsmParser.cpp @@ -45,9 +45,9 @@ class XtensaAsmParser : public MCTargetAsmParser { ParseStatus parseDirective(AsmToken DirectiveID) override; bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override; - bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + bool parseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) override; - bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) override; @@ -425,7 +425,7 @@ bool XtensaAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, return true; } -bool XtensaAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, +bool XtensaAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, @@ -730,7 +730,7 @@ bool XtensaAsmParser::ParseInstructionWithSR(ParseInstructionInfo &Info, return false; } -bool XtensaAsmParser::ParseInstruction(ParseInstructionInfo &Info, +bool XtensaAsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) { if (Name.starts_with("wsr") || Name.starts_with("rsr") || diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index 5c9faa9449f5..ea51d7790457 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -2323,6 +2323,11 @@ static Value *optimizeIntegerToVectorInsertions(BitCastInst &CI, auto *DestVecTy = cast<FixedVectorType>(CI.getType()); Value *IntInput = CI.getOperand(0); + // if the int input is just an undef value do not try to optimize to vector + // insertions as it will prevent undef propagation + if (isa<UndefValue>(IntInput)) + return nullptr; + SmallVector<Value*, 8> Elements(DestVecTy->getNumElements()); if (!collectInsertionElements(IntInput, 0, Elements, DestVecTy->getElementType(), diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 5cdfeada7f0a..80d6ceca094d 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -2227,18 +2227,24 @@ Instruction *InstCombinerImpl::foldICmpMulConstant(ICmpInst &Cmp, return NewC ? new ICmpInst(Pred, X, NewC) : nullptr; } -/// Fold icmp (shl 1, Y), C. -static Instruction *foldICmpShlOne(ICmpInst &Cmp, Instruction *Shl, - const APInt &C) { +/// Fold icmp (shl nuw C2, Y), C. +static Instruction *foldICmpShlLHSC(ICmpInst &Cmp, Instruction *Shl, + const APInt &C) { Value *Y; - if (!match(Shl, m_Shl(m_One(), m_Value(Y)))) + const APInt *C2; + if (!match(Shl, m_NUWShl(m_APInt(C2), m_Value(Y)))) return nullptr; Type *ShiftType = Shl->getType(); unsigned TypeBits = C.getBitWidth(); - bool CIsPowerOf2 = C.isPowerOf2(); ICmpInst::Predicate Pred = Cmp.getPredicate(); if (Cmp.isUnsigned()) { + if (C2->isZero() || C2->ugt(C)) + return nullptr; + APInt Div, Rem; + APInt::udivrem(C, *C2, Div, Rem); + bool CIsPowerOf2 = Rem.isZero() && Div.isPowerOf2(); + // (1 << Y) pred C -> Y pred Log2(C) if (!CIsPowerOf2) { // (1 << Y) < 30 -> Y <= 4 @@ -2251,9 +2257,9 @@ static Instruction *foldICmpShlOne(ICmpInst &Cmp, Instruction *Shl, Pred = ICmpInst::ICMP_UGT; } - unsigned CLog2 = C.logBase2(); + unsigned CLog2 = Div.logBase2(); return new ICmpInst(Pred, Y, ConstantInt::get(ShiftType, CLog2)); - } else if (Cmp.isSigned()) { + } else if (Cmp.isSigned() && C2->isOne()) { Constant *BitWidthMinusOne = ConstantInt::get(ShiftType, TypeBits - 1); // (1 << Y) > 0 -> Y != 31 // (1 << Y) > C -> Y != 31 if C is negative. @@ -2307,7 +2313,7 @@ Instruction *InstCombinerImpl::foldICmpShlConstant(ICmpInst &Cmp, const APInt *ShiftAmt; if (!match(Shl->getOperand(1), m_APInt(ShiftAmt))) - return foldICmpShlOne(Cmp, Shl, C); + return foldICmpShlLHSC(Cmp, Shl, C); // Check that the shift amount is in range. If not, don't perform undefined // shifts. When the shift is visited, it will be simplified. diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index a051a568bfd6..da6f991ad4cd 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -634,6 +634,11 @@ public: Instruction *foldPHIArgZextsIntoPHI(PHINode &PN); Instruction *foldPHIArgIntToPtrToPHI(PHINode &PN); + /// If the phi is within a phi web, which is formed by the def-use chain + /// of phis and all the phis in the web are only used in the other phis. + /// In this case, these phis are dead and we will remove all of them. + bool foldDeadPhiWeb(PHINode &PN); + /// If an integer typed PHI has only one use which is an IntToPtr operation, /// replace the PHI with an existing pointer typed PHI if it exists. Otherwise /// insert a new pointer typed PHI and replace the original one. diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp index bcff9a72b657..cb5c44730512 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -53,6 +53,34 @@ void InstCombinerImpl::PHIArgMergedDebugLoc(Instruction *Inst, PHINode &PN) { } } +/// If the phi is within a phi web, which is formed by the def-use chain +/// of phis and all the phis in the web are only used in the other phis. +/// In this case, these phis are dead and we will remove all of them. +bool InstCombinerImpl::foldDeadPhiWeb(PHINode &PN) { + SmallVector<PHINode *, 16> Stack; + SmallPtrSet<PHINode *, 16> Visited; + Stack.push_back(&PN); + while (!Stack.empty()) { + PHINode *Phi = Stack.pop_back_val(); + if (!Visited.insert(Phi).second) + continue; + // Early stop if the set of PHIs is large + if (Visited.size() == 16) + return false; + for (User *Use : Phi->users()) { + if (PHINode *PhiUse = dyn_cast<PHINode>(Use)) + Stack.push_back(PhiUse); + else + return false; + } + } + for (PHINode *Phi : Visited) + replaceInstUsesWith(*Phi, PoisonValue::get(Phi->getType())); + for (PHINode *Phi : Visited) + eraseInstFromFunction(*Phi); + return true; +} + // Replace Integer typed PHI PN if the PHI's value is used as a pointer value. // If there is an existing pointer typed PHI that produces the same value as PN, // replace PN and the IntToPtr operation with it. Otherwise, synthesize a new @@ -976,26 +1004,6 @@ Instruction *InstCombinerImpl::foldPHIArgOpIntoPHI(PHINode &PN) { return NewCI; } -/// Return true if this PHI node is only used by a PHI node cycle that is dead. -static bool isDeadPHICycle(PHINode *PN, - SmallPtrSetImpl<PHINode *> &PotentiallyDeadPHIs) { - if (PN->use_empty()) return true; - if (!PN->hasOneUse()) return false; - - // Remember this node, and if we find the cycle, return. - if (!PotentiallyDeadPHIs.insert(PN).second) - return true; - - // Don't scan crazily complex things. - if (PotentiallyDeadPHIs.size() == 16) - return false; - - if (PHINode *PU = dyn_cast<PHINode>(PN->user_back())) - return isDeadPHICycle(PU, PotentiallyDeadPHIs); - - return false; -} - /// Return true if this phi node is always equal to NonPhiInVal. /// This happens with mutually cyclic phi nodes like: /// z = some value; x = phi (y, z); y = phi (x, z) @@ -1474,27 +1482,21 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) { } } - // If this is a trivial cycle in the PHI node graph, remove it. Basically, if - // this PHI only has a single use (a PHI), and if that PHI only has one use (a - // PHI)... break the cycle. + if (foldDeadPhiWeb(PN)) + return nullptr; + + // Optimization when the phi only has one use if (PN.hasOneUse()) { if (foldIntegerTypedPHI(PN)) return nullptr; - Instruction *PHIUser = cast<Instruction>(PN.user_back()); - if (PHINode *PU = dyn_cast<PHINode>(PHIUser)) { - SmallPtrSet<PHINode*, 16> PotentiallyDeadPHIs; - PotentiallyDeadPHIs.insert(&PN); - if (isDeadPHICycle(PU, PotentiallyDeadPHIs)) - return replaceInstUsesWith(PN, PoisonValue::get(PN.getType())); - } - // If this phi has a single use, and if that use just computes a value for // the next iteration of a loop, delete the phi. This occurs with unused // induction variables, e.g. "for (int j = 0; ; ++j);". Detecting this // common case here is good because the only other things that catch this // are induction variable analysis (sometimes) and ADCE, which is only run // late. + Instruction *PHIUser = cast<Instruction>(PN.user_back()); if (PHIUser->hasOneUse() && (isa<BinaryOperator>(PHIUser) || isa<UnaryOperator>(PHIUser) || isa<GetElementPtrInst>(PHIUser)) && diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp index fc78d8c60ec0..e76689e2f5f0 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp @@ -26,6 +26,7 @@ #include "llvm/IR/Analysis.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" @@ -34,6 +35,7 @@ #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" #include "llvm/Transforms/Scalar/DCE.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include <deque> using namespace llvm; @@ -51,6 +53,11 @@ class ProfileAnnotator final { class BBInfo { std::optional<uint64_t> Count; + // OutEdges is dimensioned to match the number of terminator operands. + // Entries in the vector match the index in the terminator operand list. In + // some cases - see `shouldExcludeEdge` and its implementation - an entry + // will be nullptr. + // InEdges doesn't have the above constraint. SmallVector<EdgeInfo *> OutEdges; SmallVector<EdgeInfo *> InEdges; size_t UnknownCountOutEdges = 0; @@ -58,22 +65,30 @@ class ProfileAnnotator final { // Pass AssumeAllKnown when we try to propagate counts from edges to BBs - // because all the edge counters must be known. - uint64_t getEdgeSum(const SmallVector<EdgeInfo *> &Edges, - bool AssumeAllKnown) const { - uint64_t Sum = 0; - for (const auto *E : Edges) - if (E) - Sum += AssumeAllKnown ? *E->Count : E->Count.value_or(0U); + // Return std::nullopt if there were no edges to sum. The user can decide + // how to interpret that. + std::optional<uint64_t> getEdgeSum(const SmallVector<EdgeInfo *> &Edges, + bool AssumeAllKnown) const { + std::optional<uint64_t> Sum; + for (const auto *E : Edges) { + // `Edges` may be `OutEdges`, case in which `E` could be nullptr. + if (E) { + if (!Sum.has_value()) + Sum = 0; + *Sum += (AssumeAllKnown ? *E->Count : E->Count.value_or(0U)); + } + } return Sum; } - void computeCountFrom(const SmallVector<EdgeInfo *> &Edges) { + bool computeCountFrom(const SmallVector<EdgeInfo *> &Edges) { assert(!Count.has_value()); Count = getEdgeSum(Edges, true); + return Count.has_value(); } void setSingleUnknownEdgeCount(SmallVector<EdgeInfo *> &Edges) { - uint64_t KnownSum = getEdgeSum(Edges, false); + uint64_t KnownSum = getEdgeSum(Edges, false).value_or(0U); uint64_t EdgeVal = *Count > KnownSum ? *Count - KnownSum : 0U; EdgeInfo *E = nullptr; for (auto *I : Edges) @@ -110,17 +125,15 @@ class ProfileAnnotator final { } bool tryTakeCountFromKnownOutEdges(const BasicBlock &BB) { - if (!succ_empty(&BB) && !UnknownCountOutEdges) { - computeCountFrom(OutEdges); - return true; + if (!UnknownCountOutEdges) { + return computeCountFrom(OutEdges); } return false; } bool tryTakeCountFromKnownInEdges(const BasicBlock &BB) { - if (!BB.isEntryBlock() && !UnknownCountInEdges) { - computeCountFrom(InEdges); - return true; + if (!UnknownCountInEdges) { + return computeCountFrom(InEdges); } return false; } @@ -178,7 +191,7 @@ class ProfileAnnotator final { bool KeepGoing = true; while (KeepGoing) { KeepGoing = false; - for (const auto &BB : reverse(F)) { + for (const auto &BB : F) { auto &Info = getBBInfo(BB); if (!Info.hasCount()) KeepGoing |= Info.tryTakeCountFromKnownOutEdges(BB) || @@ -198,6 +211,52 @@ class ProfileAnnotator final { BBInfo &getBBInfo(const BasicBlock &BB) { return BBInfos.find(&BB)->second; } + const BBInfo &getBBInfo(const BasicBlock &BB) const { + return BBInfos.find(&BB)->second; + } + + // validation function after we propagate the counters: all BBs and edges' + // counters must have a value. + bool allCountersAreAssigned() const { + for (const auto &BBInfo : BBInfos) + if (!BBInfo.second.hasCount()) + return false; + for (const auto &EdgeInfo : EdgeInfos) + if (!EdgeInfo.Count.has_value()) + return false; + return true; + } + + /// Check that all paths from the entry basic block that use edges with + /// non-zero counts arrive at a basic block with no successors (i.e. "exit") + bool allTakenPathsExit() const { + std::deque<const BasicBlock *> Worklist; + DenseSet<const BasicBlock *> Visited; + Worklist.push_back(&F.getEntryBlock()); + Visited.insert(&F.getEntryBlock()); + while (!Worklist.empty()) { + const auto *BB = Worklist.front(); + Worklist.pop_front(); + if (succ_size(BB) <= 1) + continue; + const auto &BBInfo = getBBInfo(*BB); + bool Inserted = false; + for (auto I = 0U; I < BB->getTerminator()->getNumSuccessors(); ++I) { + const auto *Succ = BB->getTerminator()->getSuccessor(I); + if (!shouldExcludeEdge(*BB, *Succ)) { + if (BBInfo.getEdgeCount(I) > 0) + if (Visited.insert(Succ).second) { + Worklist.push_back(Succ); + Inserted = true; + } + } + } + if (!Inserted) + return false; + } + return true; + } + public: ProfileAnnotator(Function &F, const SmallVectorImpl<uint64_t> &Counters, InstrProfSummaryBuilder &PB) @@ -216,6 +275,9 @@ public: "profile is managed by IPO transforms"); (void)Index; Count = Counters[Ins->getIndex()->getZExtValue()]; + } else if (isa<UnreachableInst>(BB.getTerminator())) { + // The program presumably didn't crash. + Count = 0; } auto [It, Ins] = BBInfos.insert({&BB, {pred_size(&BB), succ_size(&BB), Count}}); @@ -268,14 +330,16 @@ public: PB.addInternalCount(EdgeCount); } - if (MaxCount == 0) - F.getContext().emitError( - "[ctx-prof] Encountered a BB with more than one successor, where " - "all outgoing edges have a 0 count. This occurs in non-exiting " - "functions (message pumps, usually) which are not supported in the " - "contextual profiling case"); - setProfMetadata(F.getParent(), Term, EdgeCounts, MaxCount); + if (MaxCount != 0) + setProfMetadata(F.getParent(), Term, EdgeCounts, MaxCount); } + assert(allCountersAreAssigned() && + "Expected all counters have been assigned."); + assert(allTakenPathsExit() && + "[ctx-prof] Encountered a BB with more than one successor, where " + "all outgoing edges have a 0 count. This occurs in non-exiting " + "functions (message pumps, usually) which are not supported in the " + "contextual profiling case"); } }; diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp index b0da19813f0a..f64488832875 100644 --- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -1270,6 +1270,18 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setOnlyWritesMemory(F); Changed |= setWillReturn(F); break; + case LibFunc_sincos: + case LibFunc_sincosf: + case LibFunc_sincosl: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotFreeMemory(F); + Changed |= setOnlyWritesMemory(F); + Changed |= setOnlyWritesMemory(F, 1); + Changed |= setOnlyWritesMemory(F, 2); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setWillReturn(F); + break; default: // FIXME: It'd be really nice to cover all the library functions we're // aware of here. diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index c63618d9dd12..09461e65e2dc 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -7885,6 +7885,13 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValu case Instruction::Call: case Instruction::CallBr: case Instruction::Invoke: + case Instruction::UDiv: + case Instruction::URem: + // Note: signed div/rem of INT_MIN / -1 is also immediate UB, not + // implemented to avoid code complexity as it is unclear how useful such + // logic is. + case Instruction::SDiv: + case Instruction::SRem: return true; } }); @@ -7986,6 +7993,9 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValu } } } + // Div/Rem by zero is immediate UB + if (match(Use, m_BinOp(m_Value(), m_Specific(I))) && Use->isIntDivRem()) + return true; } return false; } diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index 917f81863cf6..6799d333fb28 100644 --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -2796,6 +2796,35 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilderBase &B) { return copyFlags(*CI, FabsCall); } +Value *LibCallSimplifier::optimizeFMod(CallInst *CI, IRBuilderBase &B) { + SimplifyQuery SQ(DL, TLI, DT, AC, CI, true, true, DC); + + // fmod(x,y) can set errno if y == 0 or x == +/-inf, and returns Nan in those + // case. If we know those do not happen, then we can convert the fmod into + // frem. + bool IsNoNan = CI->hasNoNaNs(); + if (!IsNoNan) { + KnownFPClass Known0 = computeKnownFPClass(CI->getOperand(0), fcInf, + /*Depth=*/0, SQ); + if (Known0.isKnownNeverInfinity()) { + KnownFPClass Known1 = + computeKnownFPClass(CI->getOperand(1), fcZero | fcSubnormal, + /*Depth=*/0, SQ); + Function *F = CI->getParent()->getParent(); + if (Known1.isKnownNeverLogicalZero(*F, CI->getType())) + IsNoNan = true; + } + } + + if (IsNoNan) { + Value *FRem = B.CreateFRemFMF(CI->getOperand(0), CI->getOperand(1), CI); + if (auto *FRemI = dyn_cast<Instruction>(FRem)) + FRemI->setHasNoNaNs(true); + return FRem; + } + return nullptr; +} + Value *LibCallSimplifier::optimizeTrigInversionPairs(CallInst *CI, IRBuilderBase &B) { Module *M = CI->getModule(); @@ -3945,6 +3974,10 @@ Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI, case LibFunc_sqrt: case LibFunc_sqrtl: return optimizeSqrt(CI, Builder); + case LibFunc_fmod: + case LibFunc_fmodf: + case LibFunc_fmodl: + return optimizeFMod(CI, Builder); case LibFunc_logf: case LibFunc_log: case LibFunc_logl: diff --git a/llvm/lib/Transforms/Vectorize/CMakeLists.txt b/llvm/lib/Transforms/Vectorize/CMakeLists.txt index 59d04ac3cecd..f33906b05fed 100644 --- a/llvm/lib/Transforms/Vectorize/CMakeLists.txt +++ b/llvm/lib/Transforms/Vectorize/CMakeLists.txt @@ -5,6 +5,7 @@ add_llvm_component_library(LLVMVectorize LoopVectorize.cpp SandboxVectorizer/DependencyGraph.cpp SandboxVectorizer/Passes/BottomUpVec.cpp + SandboxVectorizer/Region.cpp SandboxVectorizer/SandboxVectorizer.cpp SLPVectorizer.cpp Vectorize.cpp diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 0fa7c2add1fa..9fb684427cfe 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -905,15 +905,6 @@ Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF) { return B.CreateElementCount(Ty, VF); } -const SCEV *createTripCountSCEV(Type *IdxTy, PredicatedScalarEvolution &PSE, - Loop *OrigLoop) { - const SCEV *BackedgeTakenCount = PSE.getBackedgeTakenCount(); - assert(!isa<SCEVCouldNotCompute>(BackedgeTakenCount) && "Invalid loop count"); - - ScalarEvolution &SE = *PSE.getSE(); - return SE.getTripCountFromExitCount(BackedgeTakenCount, IdxTy, OrigLoop); -} - void reportVectorizationFailure(const StringRef DebugMsg, const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, Loop *TheLoop, @@ -4750,7 +4741,10 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor( if (!MainLoopVF.isScalable() && !NextVF.Width.isScalable()) { // TODO: extend to support scalable VFs. if (!RemainingIterations) { - const SCEV *TC = createTripCountSCEV(TCType, PSE, OrigLoop); + const SCEV *TC = vputils::getSCEVExprForVPValue( + getPlanFor(NextVF.Width).getTripCount(), SE); + assert(!isa<SCEVCouldNotCompute>(TC) && + "Trip count SCEV must be computable"); RemainingIterations = SE.getURemExpr( TC, SE.getConstant(TCType, MainLoopVF.getKnownMinValue() * IC)); } @@ -8863,10 +8857,9 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { return !CM.requiresScalarEpilogue(VF.isVector()); }, Range); - VPlanPtr Plan = VPlan::createInitialVPlan( - createTripCountSCEV(Legal->getWidestInductionType(), PSE, OrigLoop), - *PSE.getSE(), RequiresScalarEpilogueCheck, CM.foldTailByMasking(), - OrigLoop); + VPlanPtr Plan = VPlan::createInitialVPlan(Legal->getWidestInductionType(), + PSE, RequiresScalarEpilogueCheck, + CM.foldTailByMasking(), OrigLoop); // Don't use getDecisionAndClampRange here, because we don't know the UF // so this function is better to be conservative, rather than to split @@ -9081,9 +9074,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) { assert(EnableVPlanNativePath && "VPlan-native path is not enabled."); // Create new empty VPlan - auto Plan = VPlan::createInitialVPlan( - createTripCountSCEV(Legal->getWidestInductionType(), PSE, OrigLoop), - *PSE.getSE(), true, false, OrigLoop); + auto Plan = VPlan::createInitialVPlan(Legal->getWidestInductionType(), PSE, + true, false, OrigLoop); // Build hierarchical CFG VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan); diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp index c4870b70fd52..0c44d05f0474 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp @@ -7,7 +7,58 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h" +#include "llvm/ADT/SmallVector.h" using namespace llvm::sandboxir; -bool BottomUpVec::runOnFunction(Function &F) { return false; } +namespace llvm::sandboxir { +// TODO: This is a temporary function that returns some seeds. +// Replace this with SeedCollector's function when it lands. +static llvm::SmallVector<Value *, 4> collectSeeds(BasicBlock &BB) { + llvm::SmallVector<Value *, 4> Seeds; + for (auto &I : BB) + if (auto *SI = llvm::dyn_cast<StoreInst>(&I)) + Seeds.push_back(SI); + return Seeds; +} + +static SmallVector<Value *, 4> getOperand(ArrayRef<Value *> Bndl, + unsigned OpIdx) { + SmallVector<Value *, 4> Operands; + for (Value *BndlV : Bndl) { + auto *BndlI = cast<Instruction>(BndlV); + Operands.push_back(BndlI->getOperand(OpIdx)); + } + return Operands; +} + +} // namespace llvm::sandboxir + +void BottomUpVec::vectorizeRec(ArrayRef<Value *> Bndl) { + auto LegalityRes = Legality.canVectorize(Bndl); + switch (LegalityRes.getSubclassID()) { + case LegalityResultID::Widen: { + auto *I = cast<Instruction>(Bndl[0]); + for (auto OpIdx : seq<unsigned>(I->getNumOperands())) { + auto OperandBndl = getOperand(Bndl, OpIdx); + vectorizeRec(OperandBndl); + } + break; + } + } +} + +void BottomUpVec::tryVectorize(ArrayRef<Value *> Bndl) { vectorizeRec(Bndl); } + +bool BottomUpVec::runOnFunction(Function &F) { + Change = false; + // TODO: Start from innermost BBs first + for (auto &BB : F) { + // TODO: Replace with proper SeedCollector function. + auto Seeds = collectSeeds(BB); + // TODO: Slice Seeds into smaller chunks. + if (Seeds.size() >= 2) + tryVectorize(Seeds); + } + return Change; +} diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Region.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Region.cpp new file mode 100644 index 000000000000..34aa9f3786f3 --- /dev/null +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Region.cpp @@ -0,0 +1,45 @@ +//===- Region.cpp ---------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Vectorize/SandboxVectorizer/Region.h" + +namespace llvm::sandboxir { + +Region::Region(Context &Ctx) : Ctx(Ctx) { + static unsigned StaticRegionID; + RegionID = StaticRegionID++; +} + +Region::~Region() {} + +void Region::add(Instruction *I) { Insts.insert(I); } + +void Region::remove(Instruction *I) { Insts.remove(I); } + +#ifndef NDEBUG +bool Region::operator==(const Region &Other) const { + if (Insts.size() != Other.Insts.size()) + return false; + if (!std::is_permutation(Insts.begin(), Insts.end(), Other.Insts.begin())) + return false; + return true; +} + +void Region::dump(raw_ostream &OS) const { + OS << "RegionID: " << getID() << "\n"; + for (auto *I : Insts) + OS << *I << "\n"; +} + +void Region::dump() const { + dump(dbgs()); + dbgs() << "\n"; +} +#endif // NDEBUG + +} // namespace llvm::sandboxir diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 41e281f3fa99..2169d78542cb 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -869,14 +869,23 @@ static VPIRBasicBlock *createVPIRBasicBlockFor(BasicBlock *BB) { return VPIRBB; } -VPlanPtr VPlan::createInitialVPlan(const SCEV *TripCount, ScalarEvolution &SE, +VPlanPtr VPlan::createInitialVPlan(Type *InductionTy, + PredicatedScalarEvolution &PSE, bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop) { VPIRBasicBlock *Entry = createVPIRBasicBlockFor(TheLoop->getLoopPreheader()); VPBasicBlock *VecPreheader = new VPBasicBlock("vector.ph"); auto Plan = std::make_unique<VPlan>(Entry, VecPreheader); + + // Create SCEV and VPValue for the trip count. + const SCEV *BackedgeTakenCount = PSE.getBackedgeTakenCount(); + assert(!isa<SCEVCouldNotCompute>(BackedgeTakenCount) && "Invalid loop count"); + ScalarEvolution &SE = *PSE.getSE(); + const SCEV *TripCount = + SE.getTripCountFromExitCount(BackedgeTakenCount, InductionTy, TheLoop); Plan->TripCount = vputils::getOrCreateVPValueForSCEVExpr(*Plan, TripCount, SE); + // Create VPRegionBlock, with empty header and latch blocks, to be filled // during processing later. VPBasicBlock *HeaderVPBB = new VPBasicBlock("vector.body"); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index eac4fe8ce8b0..9b9e710ddc88 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -83,9 +83,6 @@ Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF); Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step); -const SCEV *createTripCountSCEV(Type *IdxTy, PredicatedScalarEvolution &PSE, - Loop *CurLoop = nullptr); - /// A helper function that returns the reciprocal of the block probability of /// predicated blocks. If we return X, we are assuming the predicated block /// will execute once for every X iterations of the loop header. @@ -3477,8 +3474,10 @@ public: /// middle VPBasicBlock. If a check is needed to guard executing the scalar /// epilogue loop, it will be added to the middle block, together with /// VPBasicBlocks for the scalar preheader and exit blocks. - static VPlanPtr createInitialVPlan(const SCEV *TripCount, - ScalarEvolution &PSE, + /// \p InductionTy is the type of the canonical induction and used for related + /// values, like the trip count expression. + static VPlanPtr createInitialVPlan(Type *InductionTy, + PredicatedScalarEvolution &PSE, bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop); diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index f091ee5a71b2..277df0637372 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -11,6 +11,7 @@ #include "VPlanCFG.h" #include "VPlanDominatorTree.h" #include "llvm/ADT/TypeSwitch.h" +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/GenericDomTreeConstruction.h" diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 9068ccf519c5..c077e2b4eac5 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1353,9 +1353,9 @@ void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent, void VPWidenEVLRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { - O << Indent << "WIDEN-VP "; + O << Indent << "WIDEN "; printAsOperand(O, SlotTracker); - O << " = " << Instruction::getOpcodeName(getOpcode()); + O << " = vp." << Instruction::getOpcodeName(getOpcode()); printFlags(O); printOperands(O, SlotTracker); } @@ -2941,10 +2941,9 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) { "scalar step must be the same across all parts"); Value *GEP = State.Builder.CreateGEP( State.Builder.getInt8Ty(), NewPointerPhi, - State.Builder.CreateMul( - StartOffset, - State.Builder.CreateVectorSplat(State.VF, ScalarStepValue), - "vector.gep")); + State.Builder.CreateMul(StartOffset, State.Builder.CreateVectorSplat( + State.VF, ScalarStepValue)), + "vector.gep"); State.set(this, GEP, Part); } } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 1d8455001001..edcd7d26e60d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -685,10 +685,11 @@ void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF, m_BranchOnCond(m_Not(m_ActiveLaneMask(m_VPValue(), m_VPValue()))))) return; - Type *IdxTy = - Plan.getCanonicalIV()->getStartValue()->getLiveInIRValue()->getType(); - const SCEV *TripCount = createTripCountSCEV(IdxTy, PSE); ScalarEvolution &SE = *PSE.getSE(); + const SCEV *TripCount = + vputils::getSCEVExprForVPValue(Plan.getTripCount(), SE); + assert(!isa<SCEVCouldNotCompute>(TripCount) && + "Trip count SCEV must be computable"); ElementCount NumElements = BestVF.multiplyCoefficientBy(BestUF); const SCEV *C = SE.getElementCount(TripCount->getType(), NumElements); if (TripCount->isZero() || diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp index c18bea4f4c59..414f8866d24f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp @@ -8,6 +8,7 @@ #include "VPlanUtils.h" #include "VPlanPatternMatch.h" +#include "llvm/ADT/TypeSwitch.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" using namespace llvm; @@ -60,3 +61,14 @@ bool vputils::isHeaderMask(const VPValue *V, VPlan &Plan) { return match(V, m_Binary<Instruction::ICmp>(m_VPValue(A), m_VPValue(B))) && IsWideCanonicalIV(A) && B == Plan.getOrCreateBackedgeTakenCount(); } + +const SCEV *vputils::getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE) { + if (V->isLiveIn()) + return SE.getSCEV(V->getLiveInIRValue()); + + // TODO: Support constructing SCEVs for more recipes as needed. + return TypeSwitch<const VPRecipeBase *, const SCEV *>(V->getDefiningRecipe()) + .Case<VPExpandSCEVRecipe>( + [](const VPExpandSCEVRecipe *R) { return R->getSCEV(); }) + .Default([&SE](const VPRecipeBase *) { return SE.getCouldNotCompute(); }); +} diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h index fc11208a4339..7b5d4300655f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h @@ -11,6 +11,11 @@ #include "VPlan.h" +namespace llvm { +class ScalarEvolution; +class SCEV; +} // namespace llvm + namespace llvm::vputils { /// Returns true if only the first lane of \p Def is used. bool onlyFirstLaneUsed(const VPValue *Def); @@ -26,6 +31,10 @@ bool onlyFirstPartUsed(const VPValue *Def); VPValue *getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr, ScalarEvolution &SE); +/// Return the SCEV expression for \p V. Returns SCEVCouldNotCompute if no +/// SCEV expression could be constructed. +const SCEV *getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE); + /// Returns true if \p VPV is uniform after vectorization. inline bool isUniformAfterVectorization(const VPValue *VPV) { // A value defined outside the vector region must be uniform after diff --git a/llvm/test/Analysis/CostModel/RISCV/fround.ll b/llvm/test/Analysis/CostModel/RISCV/fround.ll index dc501b82417d..b4740f223eca 100644 --- a/llvm/test/Analysis/CostModel/RISCV/fround.ll +++ b/llvm/test/Analysis/CostModel/RISCV/fround.ll @@ -233,10 +233,10 @@ define void @trunc_fp16() { ; ; ZVFHMIN-LABEL: 'trunc_fp16' ; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %1 = call half @llvm.trunc.f16(half undef) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %2 = call <2 x half> @llvm.trunc.v2f16(<2 x half> undef) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %3 = call <4 x half> @llvm.trunc.v4f16(<4 x half> undef) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %4 = call <8 x half> @llvm.trunc.v8f16(<8 x half> undef) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 159 for instruction: %5 = call <16 x half> @llvm.trunc.v16f16(<16 x half> undef) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x half> @llvm.trunc.v2f16(<2 x half> undef) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x half> @llvm.trunc.v4f16(<4 x half> undef) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x half> @llvm.trunc.v8f16(<8 x half> undef) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x half> @llvm.trunc.v16f16(<16 x half> undef) ; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x half> @llvm.trunc.nxv1f16(<vscale x 1 x half> undef) ; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x half> @llvm.trunc.nxv2f16(<vscale x 2 x half> undef) ; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x half> @llvm.trunc.nxv4f16(<vscale x 4 x half> undef) @@ -1108,10 +1108,10 @@ define void @vp_roundtozero_f16() { ; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; ZVFHMIN-LABEL: 'vp_roundtozero_f16' -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %1 = call <2 x half> @llvm.vp.roundtozero.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %2 = call <4 x half> @llvm.vp.roundtozero.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %3 = call <8 x half> @llvm.vp.roundtozero.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %4 = call <16 x half> @llvm.vp.roundtozero.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call <2 x half> @llvm.vp.roundtozero.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <4 x half> @llvm.vp.roundtozero.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <8 x half> @llvm.vp.roundtozero.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <16 x half> @llvm.vp.roundtozero.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef) ; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call <vscale x 1 x half> @llvm.vp.roundtozero.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef) ; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 2 x half> @llvm.vp.roundtozero.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef) ; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x half> @llvm.vp.roundtozero.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef) diff --git a/llvm/test/Analysis/CtxProfAnalysis/flatten-zero-path.ll b/llvm/test/Analysis/CtxProfAnalysis/flatten-zero-path.ll new file mode 100644 index 000000000000..7eea1c36afc3 --- /dev/null +++ b/llvm/test/Analysis/CtxProfAnalysis/flatten-zero-path.ll @@ -0,0 +1,55 @@ +; Check that flattened profile lowering handles cold subgraphs that end in "unreachable" +; RUN: split-file %s %t +; RUN: llvm-ctxprof-util fromJSON --input=%t/profile.json --output=%t/profile.ctxprofdata +; RUN: opt -passes=ctx-prof-flatten %t/example.ll -use-ctx-profile=%t/profile.ctxprofdata -S -o - | FileCheck %s + +; CHECK-LABEL: entry: +; CHECK: br i1 %t, label %yes, label %no, !prof ![[C1:[0-9]+]] +; CHECK-LABEL: no: +; CHECK-NOT: !prof +; CHECK-LABEL: no1: +; CHECK-NOT: !prof +; CHECK-LABEL: no2: +; CHECK-NOT: !prof +; CHECK-LABEL: yes: +; CHECK: br i1 %t3, label %yes1, label %yes2, !prof ![[C1]] +; CHECK-NOT: !prof +; CHECK: ![[C1]] = !{!"branch_weights", i32 6, i32 0} + +;--- example.ll +define void @f1(i32 %cond) !guid !0 { +entry: + call void @llvm.instrprof.increment(ptr @f1, i64 42, i32 42, i32 0) + %t = icmp eq i32 %cond, 1 + br i1 %t, label %yes, label %no + +no: + %t2 = icmp eq i32 %cond, 2 + br i1 %t2, label %no1, label %no2 +no1: + unreachable +no2: + call void @llvm.instrprof.increment(ptr @f1, i64 42, i32 42, i32 1) + unreachable +yes: + %t3 = icmp eq i32 %cond, 3 + br i1 %t3, label %yes1, label %yes2 +yes1: + br label %exit +yes2: + call void @llvm.instrprof.increment(ptr @f1, i64 42, i32 42, i32 2) + %t4 = icmp eq i32 %cond, 4 + br i1 %t4, label %yes3, label %yes4 +yes3: + br label %exit +yes4: + call void @llvm.instrprof.increment(ptr @f1, i64 42, i32 42, i32 3) + unreachable +exit: + ret void +} + +!0 = !{i64 1234} + +;--- profile.json +[{"Guid":1234, "Counters":[6,0,0,0]}] diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2bf16.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2bf16.ll index 3135addec161..c68138acc9b2 100644 --- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2bf16.ll @@ -965,11 +965,7 @@ define amdgpu_kernel void @v_insertelement_v8bf16_3(ptr addrspace(1) %out, ptr a ; GFX900-NEXT: v_mov_b32_e32 v5, 0x5040100 ; GFX900-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3] -; GFX900-NEXT: s_mov_b32 s2, 0xffff ; GFX900-NEXT: s_waitcnt vmcnt(0) -; GFX900-NEXT: v_bfi_b32 v3, s2, v3, v3 -; GFX900-NEXT: v_bfi_b32 v2, s2, v2, v2 -; GFX900-NEXT: v_bfi_b32 v0, s2, v0, v0 ; GFX900-NEXT: v_perm_b32 v1, s4, v1, v5 ; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] ; GFX900-NEXT: s_endpgm @@ -980,14 +976,10 @@ define amdgpu_kernel void @v_insertelement_v8bf16_3(ptr addrspace(1) %out, ptr a ; GFX940-NEXT: s_load_dword s0, s[2:3], 0x10 ; GFX940-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX940-NEXT: v_lshlrev_b32_e32 v4, 4, v0 -; GFX940-NEXT: s_mov_b32 s1, 0xffff +; GFX940-NEXT: v_mov_b32_e32 v5, 0x5040100 ; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: global_load_dwordx4 v[0:3], v4, s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v5, 0x5040100 ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_bfi_b32 v3, s1, v3, v3 -; GFX940-NEXT: v_bfi_b32 v2, s1, v2, v2 -; GFX940-NEXT: v_bfi_b32 v0, s1, v0, v0 ; GFX940-NEXT: v_perm_b32 v1, s0, v1, v5 ; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] sc0 sc1 ; GFX940-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/phi-vgpr-input-moveimm.mir b/llvm/test/CodeGen/AMDGPU/phi-vgpr-input-moveimm.mir index dab4c9d40140..d21dbd290acc 100644 --- a/llvm/test/CodeGen/AMDGPU/phi-vgpr-input-moveimm.mir +++ b/llvm/test/CodeGen/AMDGPU/phi-vgpr-input-moveimm.mir @@ -73,13 +73,13 @@ body: | ; GCN-NEXT: successors: %bb.2(0x80000000) ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[S_ADD_U:%[0-9]+]]:sreg_64 = S_ADD_U64_PSEUDO [[COPY]], [[COPY1]], implicit-def $scc - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[S_ADD_U]].sub0, implicit $exec + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[S_ADD_U]], implicit $exec ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2: ; GCN-NEXT: successors: %bb.3(0x80000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[V_MOV_B64_e32_]].sub0, %bb.3, [[COPY2]], %bb.1 + ; GCN-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[V_MOV_B64_e32_]].sub0, %bb.3, [[COPY2]].sub0, %bb.1 ; GCN-NEXT: S_BRANCH %bb.3 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.3: diff --git a/llvm/test/CodeGen/DirectX/countbits.ll b/llvm/test/CodeGen/DirectX/countbits.ll new file mode 100644 index 000000000000..c6bc2b679094 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/countbits.ll @@ -0,0 +1,47 @@ +; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s + +; Make sure dxil operation function calls for countbits are generated for all integer types. + +define noundef i16 @test_countbits_short(i16 noundef %a) { +entry: +; CHECK: call i16 @dx.op.unary.i16(i32 31, i16 %{{.*}}) + %elt.ctpop = call i16 @llvm.ctpop.i16(i16 %a) + ret i16 %elt.ctpop +} + +define noundef i32 @test_countbits_int(i32 noundef %a) { +entry: +; CHECK: call i32 @dx.op.unary.i32(i32 31, i32 %{{.*}}) + %elt.ctpop = call i32 @llvm.ctpop.i32(i32 %a) + ret i32 %elt.ctpop +} + +define noundef i64 @test_countbits_long(i64 noundef %a) { +entry: +; CHECK: call i64 @dx.op.unary.i64(i32 31, i64 %{{.*}}) + %elt.ctpop = call i64 @llvm.ctpop.i64(i64 %a) + ret i64 %elt.ctpop +} + +define noundef <4 x i32> @countbits_vec4_i32(<4 x i32> noundef %a) { +entry: + ; CHECK: [[ee0:%.*]] = extractelement <4 x i32> %a, i64 0 + ; CHECK: [[ie0:%.*]] = call i32 @dx.op.unary.i32(i32 31, i32 [[ee0]]) + ; CHECK: [[ee1:%.*]] = extractelement <4 x i32> %a, i64 1 + ; CHECK: [[ie1:%.*]] = call i32 @dx.op.unary.i32(i32 31, i32 [[ee1]]) + ; CHECK: [[ee2:%.*]] = extractelement <4 x i32> %a, i64 2 + ; CHECK: [[ie2:%.*]] = call i32 @dx.op.unary.i32(i32 31, i32 [[ee2]]) + ; CHECK: [[ee3:%.*]] = extractelement <4 x i32> %a, i64 3 + ; CHECK: [[ie3:%.*]] = call i32 @dx.op.unary.i32(i32 31, i32 [[ee3]]) + ; CHECK: insertelement <4 x i32> poison, i32 [[ie0]], i64 0 + ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie1]], i64 1 + ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie2]], i64 2 + ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie3]], i64 3 + %2 = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a) + ret <4 x i32> %2 +} + +declare i16 @llvm.ctpop.i16(i16) +declare i32 @llvm.ctpop.i32(i32) +declare i64 @llvm.ctpop.i64(i64) +declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) diff --git a/llvm/test/CodeGen/NVPTX/vector-loads.ll b/llvm/test/CodeGen/NVPTX/vector-loads.ll index 9322b9e0fe6c..f582ebc166dd 100644 --- a/llvm/test/CodeGen/NVPTX/vector-loads.ll +++ b/llvm/test/CodeGen/NVPTX/vector-loads.ll @@ -198,3 +198,12 @@ define void @extv8f16_generic_a4(ptr noalias readonly align 16 %dst, ptr noalias !1 = !{i32 0, i32 64} + +; CHECK-LABEL: bf16_v4_align_load_store +define dso_local void @bf16_v4_align_load_store(ptr noundef %0, ptr noundef %1) #0 { + ; CHECK: ld.v4.b16 + ; CHECK: st.v4.b16 + %3 = load <4 x bfloat>, ptr %1, align 8 + store <4 x bfloat> %3, ptr %0, align 8 + ret void +} diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/double-intrinsics.ll b/llvm/test/CodeGen/RISCV/GlobalISel/double-intrinsics.ll new file mode 100644 index 000000000000..ad461f8f24b9 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/double-intrinsics.ll @@ -0,0 +1,264 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -global-isel -mattr=+d \ +; RUN: -verify-machineinstrs -target-abi=ilp32d \ +; RUN: | FileCheck -check-prefixes=CHECKIFD,RV32IFD %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -global-isel -mattr=+d \ +; RUN: -verify-machineinstrs -target-abi=lp64d \ +; RUN: | FileCheck -check-prefixes=CHECKIFD,RV64IFD %s + +declare double @llvm.sqrt.f64(double) + +define double @sqrt_f64(double %a) nounwind { +; CHECKIFD-LABEL: sqrt_f64: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: fsqrt.d fa0, fa0 +; CHECKIFD-NEXT: ret + %1 = call double @llvm.sqrt.f64(double %a) + ret double %1 +} + +declare double @llvm.fma.f64(double, double, double) + +define double @fma_f64(double %a, double %b, double %c) nounwind { +; CHECKIFD-LABEL: fma_f64: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: fmadd.d fa0, fa0, fa1, fa2 +; CHECKIFD-NEXT: ret + %1 = call double @llvm.fma.f64(double %a, double %b, double %c) + ret double %1 +} + +declare double @llvm.fmuladd.f64(double, double, double) + +define double @fmuladd_f64(double %a, double %b, double %c) nounwind { +; CHECKIFD-LABEL: fmuladd_f64: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: fmadd.d fa0, fa0, fa1, fa2 +; CHECKIFD-NEXT: ret + %1 = call double @llvm.fmuladd.f64(double %a, double %b, double %c) + ret double %1 +} + +declare double @llvm.fabs.f64(double) + +define double @fabs_f64(double %a) nounwind { +; CHECKIFD-LABEL: fabs_f64: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: fabs.d fa0, fa0 +; CHECKIFD-NEXT: ret + %1 = call double @llvm.fabs.f64(double %a) + ret double %1 +} + +declare double @llvm.minnum.f64(double, double) + +define double @minnum_f64(double %a, double %b) nounwind { +; CHECKIFD-LABEL: minnum_f64: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: fmin.d fa0, fa0, fa1 +; CHECKIFD-NEXT: ret + %1 = call double @llvm.minnum.f64(double %a, double %b) + ret double %1 +} + +declare double @llvm.maxnum.f64(double, double) + +define double @maxnum_f64(double %a, double %b) nounwind { +; CHECKIFD-LABEL: maxnum_f64: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: fmax.d fa0, fa0, fa1 +; CHECKIFD-NEXT: ret + %1 = call double @llvm.maxnum.f64(double %a, double %b) + ret double %1 +} + +declare double @llvm.copysign.f64(double, double) + +define double @copysign_f64(double %a, double %b) nounwind { +; CHECKIFD-LABEL: copysign_f64: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: fsgnj.d fa0, fa0, fa1 +; CHECKIFD-NEXT: ret + %1 = call double @llvm.copysign.f64(double %a, double %b) + ret double %1 +} + +declare double @llvm.floor.f64(double) + +define double @floor_f64(double %a) nounwind { +; RV32IFD-LABEL: floor_f64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: call floor +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: floor_f64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: addi sp, sp, -16 +; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IFD-NEXT: call floor +; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IFD-NEXT: addi sp, sp, 16 +; RV64IFD-NEXT: ret + %1 = call double @llvm.floor.f64(double %a) + ret double %1 +} + +declare double @llvm.ceil.f64(double) + +define double @ceil_f64(double %a) nounwind { +; RV32IFD-LABEL: ceil_f64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: call ceil +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: ceil_f64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: addi sp, sp, -16 +; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IFD-NEXT: call ceil +; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IFD-NEXT: addi sp, sp, 16 +; RV64IFD-NEXT: ret + %1 = call double @llvm.ceil.f64(double %a) + ret double %1 +} + +declare double @llvm.trunc.f64(double) + +define double @trunc_f64(double %a) nounwind { +; RV32IFD-LABEL: trunc_f64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: call trunc +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: trunc_f64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: addi sp, sp, -16 +; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IFD-NEXT: call trunc +; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IFD-NEXT: addi sp, sp, 16 +; RV64IFD-NEXT: ret + %1 = call double @llvm.trunc.f64(double %a) + ret double %1 +} + +declare double @llvm.rint.f64(double) + +define double @rint_f64(double %a) nounwind { +; RV32IFD-LABEL: rint_f64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: call rint +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: rint_f64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: addi sp, sp, -16 +; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IFD-NEXT: call rint +; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IFD-NEXT: addi sp, sp, 16 +; RV64IFD-NEXT: ret + %1 = call double @llvm.rint.f64(double %a) + ret double %1 +} + +declare double @llvm.nearbyint.f64(double) + +define double @nearbyint_f64(double %a) nounwind { +; RV32IFD-LABEL: nearbyint_f64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: call nearbyint +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: nearbyint_f64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: addi sp, sp, -16 +; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IFD-NEXT: call nearbyint +; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IFD-NEXT: addi sp, sp, 16 +; RV64IFD-NEXT: ret + %1 = call double @llvm.nearbyint.f64(double %a) + ret double %1 +} + +declare double @llvm.round.f64(double) + +define double @round_f64(double %a) nounwind { +; RV32IFD-LABEL: round_f64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: call round +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: round_f64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: addi sp, sp, -16 +; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IFD-NEXT: call round +; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IFD-NEXT: addi sp, sp, 16 +; RV64IFD-NEXT: ret + %1 = call double @llvm.round.f64(double %a) + ret double %1 +} + +declare double @llvm.roundeven.f64(double) + +define double @roundeven_f64(double %a) nounwind { +; RV32IFD-LABEL: roundeven_f64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: call roundeven +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: roundeven_f64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: addi sp, sp, -16 +; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IFD-NEXT: call roundeven +; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IFD-NEXT: addi sp, sp, 16 +; RV64IFD-NEXT: ret + %1 = call double @llvm.roundeven.f64(double %a) + ret double %1 +} + +declare i1 @llvm.is.fpclass.f64(double, i32) +define i1 @isnan_d_fpclass(double %x) { +; CHECKIFD-LABEL: isnan_d_fpclass: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: fclass.d a0, fa0 +; CHECKIFD-NEXT: andi a0, a0, 768 +; CHECKIFD-NEXT: snez a0, a0 +; CHECKIFD-NEXT: ret + %1 = call i1 @llvm.is.fpclass.f64(double %x, i32 3) ; nan + ret i1 %1 +} diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/float-intrinsics.ll b/llvm/test/CodeGen/RISCV/GlobalISel/float-intrinsics.ll new file mode 100644 index 000000000000..39a5beb317ab --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/float-intrinsics.ll @@ -0,0 +1,441 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -global-isel -mattr=+f \ +; RUN: -verify-machineinstrs -target-abi=ilp32f \ +; RUN: | FileCheck -check-prefix=RV32IF %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -global-isel -mattr=+d \ +; RUN: -verify-machineinstrs -target-abi=ilp32f \ +; RUN: | FileCheck -check-prefix=RV32IF %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -global-isel -mattr=+f \ +; RUN: -verify-machineinstrs -target-abi=lp64f \ +; RUN: | FileCheck -check-prefix=RV64IF %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -global-isel -mattr=+d \ +; RUN: -verify-machineinstrs -target-abi=lp64d \ +; RUN: | FileCheck -check-prefix=RV64IF %s + +define float @sqrt_f32(float %a) nounwind { +; RV32IF-LABEL: sqrt_f32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fsqrt.s fa0, fa0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: sqrt_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fsqrt.s fa0, fa0 +; RV64IF-NEXT: ret + %1 = call float @llvm.sqrt.f32(float %a) + ret float %1 +} + +define float @fma_f32(float %a, float %b, float %c) nounwind { +; RV32IF-LABEL: fma_f32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fmadd.s fa0, fa0, fa1, fa2 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fma_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fmadd.s fa0, fa0, fa1, fa2 +; RV64IF-NEXT: ret + %1 = call float @llvm.fma.f32(float %a, float %b, float %c) + ret float %1 +} + +define float @fmuladd_f32(float %a, float %b, float %c) nounwind { +; RV32IF-LABEL: fmuladd_f32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fmadd.s fa0, fa0, fa1, fa2 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fmuladd_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fmadd.s fa0, fa0, fa1, fa2 +; RV64IF-NEXT: ret + %1 = call float @llvm.fmuladd.f32(float %a, float %b, float %c) + ret float %1 +} + +define float @fabs_f32(float %a) nounwind { +; RV32IF-LABEL: fabs_f32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fabs.s fa0, fa0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fabs_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fabs.s fa0, fa0 +; RV64IF-NEXT: ret + %1 = call float @llvm.fabs.f32(float %a) + ret float %1 +} + +define float @minnum_f32(float %a, float %b) nounwind { +; RV32IF-LABEL: minnum_f32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fmin.s fa0, fa0, fa1 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: minnum_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fmin.s fa0, fa0, fa1 +; RV64IF-NEXT: ret + %1 = call float @llvm.minnum.f32(float %a, float %b) + ret float %1 +} + +define float @maxnum_f32(float %a, float %b) nounwind { +; RV32IF-LABEL: maxnum_f32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fmax.s fa0, fa0, fa1 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: maxnum_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fmax.s fa0, fa0, fa1 +; RV64IF-NEXT: ret + %1 = call float @llvm.maxnum.f32(float %a, float %b) + ret float %1 +} + +define float @copysign_f32(float %a, float %b) nounwind { +; RV32IF-LABEL: copysign_f32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fsgnj.s fa0, fa0, fa1 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: copysign_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fsgnj.s fa0, fa0, fa1 +; RV64IF-NEXT: ret + %1 = call float @llvm.copysign.f32(float %a, float %b) + ret float %1 +} + +define float @ceil_f32(float %a) nounwind { +; RV32IF-LABEL: ceil_f32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call ceilf +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: ceil_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: addi sp, sp, -16 +; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IF-NEXT: call ceilf +; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IF-NEXT: addi sp, sp, 16 +; RV64IF-NEXT: ret + %1 = call float @llvm.ceil.f32(float %a) + ret float %1 +} + +define float @trunc_f32(float %a) nounwind { +; RV32IF-LABEL: trunc_f32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call truncf +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: trunc_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: addi sp, sp, -16 +; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IF-NEXT: call truncf +; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IF-NEXT: addi sp, sp, 16 +; RV64IF-NEXT: ret + %1 = call float @llvm.trunc.f32(float %a) + ret float %1 +} + +define float @rint_f32(float %a) nounwind { +; RV32IF-LABEL: rint_f32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call rintf +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: rint_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: addi sp, sp, -16 +; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IF-NEXT: call rintf +; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IF-NEXT: addi sp, sp, 16 +; RV64IF-NEXT: ret + %1 = call float @llvm.rint.f32(float %a) + ret float %1 +} + +define float @nearbyint_f32(float %a) nounwind { +; RV32IF-LABEL: nearbyint_f32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call nearbyintf +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: nearbyint_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: addi sp, sp, -16 +; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IF-NEXT: call nearbyintf +; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IF-NEXT: addi sp, sp, 16 +; RV64IF-NEXT: ret + %1 = call float @llvm.nearbyint.f32(float %a) + ret float %1 +} + +define float @round_f32(float %a) nounwind { +; RV32IF-LABEL: round_f32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call roundf +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: round_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: addi sp, sp, -16 +; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IF-NEXT: call roundf +; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IF-NEXT: addi sp, sp, 16 +; RV64IF-NEXT: ret + %1 = call float @llvm.round.f32(float %a) + ret float %1 +} + +define float @roundeven_f32(float %a) nounwind { +; RV32IF-LABEL: roundeven_f32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call roundevenf +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: roundeven_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: addi sp, sp, -16 +; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IF-NEXT: call roundevenf +; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IF-NEXT: addi sp, sp, 16 +; RV64IF-NEXT: ret + %1 = call float @llvm.roundeven.f32(float %a) + ret float %1 +} + +define i1 @fpclass(float %x) { +; RV32IF-LABEL: fpclass: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fclass.s a0, fa0 +; RV32IF-NEXT: andi a0, a0, 927 +; RV32IF-NEXT: snez a0, a0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fpclass: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fclass.s a0, fa0 +; RV64IF-NEXT: andi a0, a0, 927 +; RV64IF-NEXT: snez a0, a0 +; RV64IF-NEXT: ret + %cmp = call i1 @llvm.is.fpclass.f32(float %x, i32 639) + ret i1 %cmp +} + +define i1 @isnan_fpclass(float %x) { +; RV32IF-LABEL: isnan_fpclass: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fclass.s a0, fa0 +; RV32IF-NEXT: andi a0, a0, 768 +; RV32IF-NEXT: snez a0, a0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: isnan_fpclass: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fclass.s a0, fa0 +; RV64IF-NEXT: andi a0, a0, 768 +; RV64IF-NEXT: snez a0, a0 +; RV64IF-NEXT: ret + %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 3) ; nan + ret i1 %1 +} + +define i1 @isqnan_fpclass(float %x) { +; RV32IF-LABEL: isqnan_fpclass: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fclass.s a0, fa0 +; RV32IF-NEXT: andi a0, a0, 512 +; RV32IF-NEXT: snez a0, a0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: isqnan_fpclass: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fclass.s a0, fa0 +; RV64IF-NEXT: andi a0, a0, 512 +; RV64IF-NEXT: snez a0, a0 +; RV64IF-NEXT: ret + %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 2) ; qnan + ret i1 %1 +} + +define i1 @issnan_fpclass(float %x) { +; RV32IF-LABEL: issnan_fpclass: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fclass.s a0, fa0 +; RV32IF-NEXT: andi a0, a0, 256 +; RV32IF-NEXT: snez a0, a0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: issnan_fpclass: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fclass.s a0, fa0 +; RV64IF-NEXT: andi a0, a0, 256 +; RV64IF-NEXT: snez a0, a0 +; RV64IF-NEXT: ret + %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 1) ; snan + ret i1 %1 +} + +define i1 @isinf_fpclass(float %x) { +; RV32IF-LABEL: isinf_fpclass: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fclass.s a0, fa0 +; RV32IF-NEXT: andi a0, a0, 129 +; RV32IF-NEXT: snez a0, a0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: isinf_fpclass: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fclass.s a0, fa0 +; RV64IF-NEXT: andi a0, a0, 129 +; RV64IF-NEXT: snez a0, a0 +; RV64IF-NEXT: ret + %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 516) ; 0x204 = "inf" + ret i1 %1 +} + +define i1 @isposinf_fpclass(float %x) { +; RV32IF-LABEL: isposinf_fpclass: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fclass.s a0, fa0 +; RV32IF-NEXT: andi a0, a0, 128 +; RV32IF-NEXT: snez a0, a0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: isposinf_fpclass: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fclass.s a0, fa0 +; RV64IF-NEXT: andi a0, a0, 128 +; RV64IF-NEXT: snez a0, a0 +; RV64IF-NEXT: ret + %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 512) ; 0x200 = "+inf" + ret i1 %1 +} + +define i1 @isneginf_fpclass(float %x) { +; RV32IF-LABEL: isneginf_fpclass: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fclass.s a0, fa0 +; RV32IF-NEXT: andi a0, a0, 1 +; RV32IF-NEXT: snez a0, a0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: isneginf_fpclass: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fclass.s a0, fa0 +; RV64IF-NEXT: andi a0, a0, 1 +; RV64IF-NEXT: snez a0, a0 +; RV64IF-NEXT: ret + %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 4) ; "-inf" + ret i1 %1 +} + +define i1 @isfinite_fpclass(float %x) { +; RV32IF-LABEL: isfinite_fpclass: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fclass.s a0, fa0 +; RV32IF-NEXT: andi a0, a0, 126 +; RV32IF-NEXT: snez a0, a0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: isfinite_fpclass: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fclass.s a0, fa0 +; RV64IF-NEXT: andi a0, a0, 126 +; RV64IF-NEXT: snez a0, a0 +; RV64IF-NEXT: ret + %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 504) ; 0x1f8 = "finite" + ret i1 %1 +} + +define i1 @isposfinite_fpclass(float %x) { +; RV32IF-LABEL: isposfinite_fpclass: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fclass.s a0, fa0 +; RV32IF-NEXT: andi a0, a0, 112 +; RV32IF-NEXT: snez a0, a0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: isposfinite_fpclass: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fclass.s a0, fa0 +; RV64IF-NEXT: andi a0, a0, 112 +; RV64IF-NEXT: snez a0, a0 +; RV64IF-NEXT: ret + %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 448) ; 0x1c0 = "+finite" + ret i1 %1 +} + +define i1 @isnegfinite_fpclass(float %x) { +; RV32IF-LABEL: isnegfinite_fpclass: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fclass.s a0, fa0 +; RV32IF-NEXT: andi a0, a0, 14 +; RV32IF-NEXT: snez a0, a0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: isnegfinite_fpclass: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fclass.s a0, fa0 +; RV64IF-NEXT: andi a0, a0, 14 +; RV64IF-NEXT: snez a0, a0 +; RV64IF-NEXT: ret + %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 56) ; 0x38 = "-finite" + ret i1 %1 +} + +define i1 @isnotfinite_fpclass(float %x) { +; RV32IF-LABEL: isnotfinite_fpclass: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fclass.s a0, fa0 +; RV32IF-NEXT: andi a0, a0, 897 +; RV32IF-NEXT: snez a0, a0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: isnotfinite_fpclass: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fclass.s a0, fa0 +; RV64IF-NEXT: andi a0, a0, 897 +; RV64IF-NEXT: snez a0, a0 +; RV64IF-NEXT: ret + %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 519) ; ox207 = "inf|nan" + ret i1 %1 +} diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-fp-ceil-floor.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-fp-ceil-floor.mir deleted file mode 100644 index 1e184bd0c112..000000000000 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-fp-ceil-floor.mir +++ /dev/null @@ -1,98 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=riscv32 -mattr=+d -run-pass=legalizer %s -o - \ -# RUN: | FileCheck %s -# RUN: llc -mtriple=riscv64 -mattr=+d -run-pass=legalizer %s -o - \ -# RUN: | FileCheck %s - ---- -name: ceil_f32 -body: | - bb.1: - liveins: $f10_f - - ; CHECK-LABEL: name: ceil_f32 - ; CHECK: liveins: $f10_f - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $f10_f - ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 - ; CHECK-NEXT: $f10_f = COPY [[COPY]](s32) - ; CHECK-NEXT: PseudoCALL target-flags(riscv-call) &ceilf, csr_ilp32d_lp64d, implicit-def $x1, implicit $f10_f, implicit-def $f10_f - ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $f10_f - ; CHECK-NEXT: $f10_f = COPY [[COPY1]](s32) - ; CHECK-NEXT: PseudoRET implicit $f10_f - %0:_(s32) = COPY $f10_f - %1:_(s32) = G_FCEIL %0 - $f10_f = COPY %1(s32) - PseudoRET implicit $f10_f - -... ---- -name: floor_f32 -body: | - bb.1: - liveins: $f10_f - - ; CHECK-LABEL: name: floor_f32 - ; CHECK: liveins: $f10_f - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $f10_f - ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 - ; CHECK-NEXT: $f10_f = COPY [[COPY]](s32) - ; CHECK-NEXT: PseudoCALL target-flags(riscv-call) &floorf, csr_ilp32d_lp64d, implicit-def $x1, implicit $f10_f, implicit-def $f10_f - ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $f10_f - ; CHECK-NEXT: $f10_f = COPY [[COPY1]](s32) - ; CHECK-NEXT: PseudoRET implicit $f10_f - %0:_(s32) = COPY $f10_f - %1:_(s32) = G_FFLOOR %0 - $f10_f = COPY %1(s32) - PseudoRET implicit $f10_f - -... ---- -name: ceil_f64 -body: | - bb.1: - liveins: $f10_d - - ; CHECK-LABEL: name: ceil_f64 - ; CHECK: liveins: $f10_d - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $f10_d - ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 - ; CHECK-NEXT: $f10_d = COPY [[COPY]](s64) - ; CHECK-NEXT: PseudoCALL target-flags(riscv-call) &ceil, csr_ilp32d_lp64d, implicit-def $x1, implicit $f10_d, implicit-def $f10_d - ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $f10_d - ; CHECK-NEXT: $f10_d = COPY [[COPY1]](s64) - ; CHECK-NEXT: PseudoRET implicit $f10_d - %0:_(s64) = COPY $f10_d - %1:_(s64) = G_FCEIL %0 - $f10_d = COPY %1(s64) - PseudoRET implicit $f10_d - -... ---- -name: floor_f64 -body: | - bb.1: - liveins: $f10_d - - ; CHECK-LABEL: name: floor_f64 - ; CHECK: liveins: $f10_d - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $f10_d - ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 - ; CHECK-NEXT: $f10_d = COPY [[COPY]](s64) - ; CHECK-NEXT: PseudoCALL target-flags(riscv-call) &floor, csr_ilp32d_lp64d, implicit-def $x1, implicit $f10_d, implicit-def $f10_d - ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $f10_d - ; CHECK-NEXT: $f10_d = COPY [[COPY1]](s64) - ; CHECK-NEXT: PseudoRET implicit $f10_d - %0:_(s64) = COPY $f10_d - %1:_(s64) = G_FFLOOR %0 - $f10_d = COPY %1(s64) - PseudoRET implicit $f10_d - -... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-load-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-load-rv32.mir index f925d2451508..bed44eb657da 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-load-rv32.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-load-rv32.mir @@ -1,6 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=riscv32 -run-pass=legalizer %s -o - \ -# RUN: | FileCheck %s +# RUN: | FileCheck %s +# RUN: llc -mtriple=riscv32 -mattr=+unaligned-scalar-mem -run-pass=legalizer %s -o - \ +# RUN: | FileCheck %s --check-prefix=UNALIGNED --- name: load_i8 @@ -26,6 +28,14 @@ body: | ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; CHECK-NEXT: $x10 = COPY [[LOAD]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 + ; + ; UNALIGNED-LABEL: name: load_i8 + ; UNALIGNED: liveins: $x10 + ; UNALIGNED-NEXT: {{ $}} + ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) + ; UNALIGNED-NEXT: $x10 = COPY [[LOAD]](s32) + ; UNALIGNED-NEXT: PseudoRET implicit $x10 %0:_(p0) = COPY $x10 %1:_(s8) = G_LOAD %0(p0) :: (load (s8)) %2:_(s32) = G_ANYEXT %1(s8) @@ -57,6 +67,14 @@ body: | ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) ; CHECK-NEXT: $x10 = COPY [[LOAD]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 + ; + ; UNALIGNED-LABEL: name: load_i16 + ; UNALIGNED: liveins: $x10 + ; UNALIGNED-NEXT: {{ $}} + ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) + ; UNALIGNED-NEXT: $x10 = COPY [[LOAD]](s32) + ; UNALIGNED-NEXT: PseudoRET implicit $x10 %0:_(p0) = COPY $x10 %1:_(s16) = G_LOAD %0(p0) :: (load (s16)) %2:_(s32) = G_ANYEXT %1(s16) @@ -87,6 +105,14 @@ body: | ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) ; CHECK-NEXT: $x10 = COPY [[LOAD]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 + ; + ; UNALIGNED-LABEL: name: load_i32 + ; UNALIGNED: liveins: $x10 + ; UNALIGNED-NEXT: {{ $}} + ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) + ; UNALIGNED-NEXT: $x10 = COPY [[LOAD]](s32) + ; UNALIGNED-NEXT: PseudoRET implicit $x10 %0:_(p0) = COPY $x10 %1:_(s32) = G_LOAD %0(p0) :: (load (s32)) $x10 = COPY %1(s32) @@ -122,6 +148,18 @@ body: | ; CHECK-NEXT: $x10 = COPY [[LOAD]](s32) ; CHECK-NEXT: $x11 = COPY [[LOAD1]](s32) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 + ; + ; UNALIGNED-LABEL: name: load_i64 + ; UNALIGNED: liveins: $x10 + ; UNALIGNED-NEXT: {{ $}} + ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 8) + ; UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) + ; UNALIGNED-NEXT: $x10 = COPY [[LOAD]](s32) + ; UNALIGNED-NEXT: $x11 = COPY [[LOAD1]](s32) + ; UNALIGNED-NEXT: PseudoRET implicit $x10, implicit $x11 %0:_(p0) = COPY $x10 %1:_(s64) = G_LOAD %0(p0) :: (load (s64)) %2:_(s32), %3:_(s32) = G_UNMERGE_VALUES %1(s64) @@ -153,6 +191,14 @@ body: | ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[COPY]](p0) :: (load (p0), align 8) ; CHECK-NEXT: $x10 = COPY [[LOAD]](p0) ; CHECK-NEXT: PseudoRET implicit $x10 + ; + ; UNALIGNED-LABEL: name: load_ptr + ; UNALIGNED: liveins: $x10 + ; UNALIGNED-NEXT: {{ $}} + ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[COPY]](p0) :: (load (p0), align 8) + ; UNALIGNED-NEXT: $x10 = COPY [[LOAD]](p0) + ; UNALIGNED-NEXT: PseudoRET implicit $x10 %0:_(p0) = COPY $x10 %1:_(p0) = G_LOAD %0(p0) :: (load (p0), align 8) $x10 = COPY %1(p0) @@ -189,6 +235,14 @@ body: | ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CHECK-NEXT: $x10 = COPY [[OR]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 + ; + ; UNALIGNED-LABEL: name: load_i16_unaligned + ; UNALIGNED: liveins: $x10 + ; UNALIGNED-NEXT: {{ $}} + ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 1) + ; UNALIGNED-NEXT: $x10 = COPY [[LOAD]](s32) + ; UNALIGNED-NEXT: PseudoRET implicit $x10 %0:_(p0) = COPY $x10 %1:_(s16) = G_LOAD %0(p0) :: (load (s16), align 1) %2:_(s32) = G_ANYEXT %1(s16) @@ -237,6 +291,14 @@ body: | ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; CHECK-NEXT: $x10 = COPY [[OR2]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 + ; + ; UNALIGNED-LABEL: name: load_i32_unaligned + ; UNALIGNED: liveins: $x10 + ; UNALIGNED-NEXT: {{ $}} + ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 1) + ; UNALIGNED-NEXT: $x10 = COPY [[LOAD]](s32) + ; UNALIGNED-NEXT: PseudoRET implicit $x10 %0:_(p0) = COPY $x10 %1:_(s32) = G_LOAD %0(p0) :: (load (s32), align 1) $x10 = COPY %1(s32) @@ -272,6 +334,14 @@ body: | ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CHECK-NEXT: $x10 = COPY [[OR]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 + ; + ; UNALIGNED-LABEL: name: load_i32_align2 + ; UNALIGNED: liveins: $x10 + ; UNALIGNED-NEXT: {{ $}} + ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 2) + ; UNALIGNED-NEXT: $x10 = COPY [[LOAD]](s32) + ; UNALIGNED-NEXT: PseudoRET implicit $x10 %0:_(p0) = COPY $x10 %1:_(s32) = G_LOAD %0(p0) :: (load (s32), align 2) $x10 = COPY %1(s32) @@ -343,6 +413,18 @@ body: | ; CHECK-NEXT: $x10 = COPY [[OR2]](s32) ; CHECK-NEXT: $x11 = COPY [[OR5]](s32) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 + ; + ; UNALIGNED-LABEL: name: load_i64_unaligned + ; UNALIGNED: liveins: $x10 + ; UNALIGNED-NEXT: {{ $}} + ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 1) + ; UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4, align 1) + ; UNALIGNED-NEXT: $x10 = COPY [[LOAD]](s32) + ; UNALIGNED-NEXT: $x11 = COPY [[LOAD1]](s32) + ; UNALIGNED-NEXT: PseudoRET implicit $x10, implicit $x11 %0:_(p0) = COPY $x10 %1:_(s64) = G_LOAD %0(p0) :: (load (s64), align 1) %2:_(s32), %3:_(s32) = G_UNMERGE_VALUES %1(s64) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-load-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-load-rv64.mir index 933bc589f601..491e4a358b1a 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-load-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-load-rv64.mir @@ -1,6 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=riscv64 -run-pass=legalizer %s -o - \ -# RUN: | FileCheck %s +# RUN: | FileCheck %s +# RUN: llc -mtriple=riscv64 -mattr=+unaligned-scalar-mem -run-pass=legalizer %s -o - \ +# RUN: | FileCheck %s --check-prefix=UNALIGNED --- name: load_i8 @@ -27,6 +29,15 @@ body: | ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CHECK-NEXT: $x10 = COPY [[ANYEXT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 + ; + ; UNALIGNED-LABEL: name: load_i8 + ; UNALIGNED: liveins: $x10 + ; UNALIGNED-NEXT: {{ $}} + ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) + ; UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; UNALIGNED-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; UNALIGNED-NEXT: PseudoRET implicit $x10 %0:_(p0) = COPY $x10 %1:_(s8) = G_LOAD %0(p0) :: (load (s8)) %2:_(s64) = G_ANYEXT %1(s8) @@ -59,6 +70,15 @@ body: | ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CHECK-NEXT: $x10 = COPY [[ANYEXT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 + ; + ; UNALIGNED-LABEL: name: load_i16 + ; UNALIGNED: liveins: $x10 + ; UNALIGNED-NEXT: {{ $}} + ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) + ; UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; UNALIGNED-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; UNALIGNED-NEXT: PseudoRET implicit $x10 %0:_(p0) = COPY $x10 %1:_(s16) = G_LOAD %0(p0) :: (load (s16)) %2:_(s64) = G_ANYEXT %1(s16) @@ -91,6 +111,15 @@ body: | ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CHECK-NEXT: $x10 = COPY [[ANYEXT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 + ; + ; UNALIGNED-LABEL: name: load_i32 + ; UNALIGNED: liveins: $x10 + ; UNALIGNED-NEXT: {{ $}} + ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) + ; UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; UNALIGNED-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; UNALIGNED-NEXT: PseudoRET implicit $x10 %0:_(p0) = COPY $x10 %1:_(s32) = G_LOAD %0(p0) :: (load (s32)) %2:_(s64) = G_ANYEXT %1(s32) @@ -121,6 +150,14 @@ body: | ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64)) ; CHECK-NEXT: $x10 = COPY [[LOAD]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 + ; + ; UNALIGNED-LABEL: name: load_i64 + ; UNALIGNED: liveins: $x10 + ; UNALIGNED-NEXT: {{ $}} + ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64)) + ; UNALIGNED-NEXT: $x10 = COPY [[LOAD]](s64) + ; UNALIGNED-NEXT: PseudoRET implicit $x10 %0:_(p0) = COPY $x10 %1:_(s64) = G_LOAD %0(p0) :: (load (s64)) $x10 = COPY %1(s64) @@ -156,6 +193,18 @@ body: | ; CHECK-NEXT: $x10 = COPY [[LOAD]](s64) ; CHECK-NEXT: $x11 = COPY [[LOAD1]](s64) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 + ; + ; UNALIGNED-LABEL: name: load_i128 + ; UNALIGNED: liveins: $x10 + ; UNALIGNED-NEXT: {{ $}} + ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64)) + ; UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 8) + ; UNALIGNED-NEXT: $x10 = COPY [[LOAD]](s64) + ; UNALIGNED-NEXT: $x11 = COPY [[LOAD1]](s64) + ; UNALIGNED-NEXT: PseudoRET implicit $x10, implicit $x11 %0:_(p0) = COPY $x10 %1:_(s128) = G_LOAD %0(p0) :: (load (s128), align 8) %2:_(s64), %3:_(s64) = G_UNMERGE_VALUES %1(s128) @@ -187,6 +236,14 @@ body: | ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[COPY]](p0) :: (load (p0)) ; CHECK-NEXT: $x10 = COPY [[LOAD]](p0) ; CHECK-NEXT: PseudoRET implicit $x10 + ; + ; UNALIGNED-LABEL: name: load_ptr + ; UNALIGNED: liveins: $x10 + ; UNALIGNED-NEXT: {{ $}} + ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[COPY]](p0) :: (load (p0)) + ; UNALIGNED-NEXT: $x10 = COPY [[LOAD]](p0) + ; UNALIGNED-NEXT: PseudoRET implicit $x10 %0:_(p0) = COPY $x10 %1:_(p0) = G_LOAD %0(p0) :: (load (p0)) $x10 = COPY %1(p0) @@ -224,6 +281,15 @@ body: | ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR]](s32) ; CHECK-NEXT: $x10 = COPY [[ANYEXT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 + ; + ; UNALIGNED-LABEL: name: load_i16_unaligned + ; UNALIGNED: liveins: $x10 + ; UNALIGNED-NEXT: {{ $}} + ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 1) + ; UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; UNALIGNED-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; UNALIGNED-NEXT: PseudoRET implicit $x10 %0:_(p0) = COPY $x10 %1:_(s16) = G_LOAD %0(p0) :: (load (s16), align 1) %2:_(s64) = G_ANYEXT %1(s16) @@ -274,6 +340,15 @@ body: | ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR2]](s32) ; CHECK-NEXT: $x10 = COPY [[ANYEXT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 + ; + ; UNALIGNED-LABEL: name: load_i32_unaligned + ; UNALIGNED: liveins: $x10 + ; UNALIGNED-NEXT: {{ $}} + ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 1) + ; UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; UNALIGNED-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; UNALIGNED-NEXT: PseudoRET implicit $x10 %0:_(p0) = COPY $x10 %1:_(s32) = G_LOAD %0(p0) :: (load (s32), align 1) %2:_(s64) = G_ANYEXT %1(s32) @@ -312,6 +387,15 @@ body: | ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR]](s32) ; CHECK-NEXT: $x10 = COPY [[ANYEXT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 + ; + ; UNALIGNED-LABEL: name: load_i32_align2 + ; UNALIGNED: liveins: $x10 + ; UNALIGNED-NEXT: {{ $}} + ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 2) + ; UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; UNALIGNED-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; UNALIGNED-NEXT: PseudoRET implicit $x10 %0:_(p0) = COPY $x10 %1:_(s32) = G_LOAD %0(p0) :: (load (s32), align 2) %2:_(s64) = G_ANYEXT %1(s32) @@ -384,6 +468,14 @@ body: | ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[OR2]] ; CHECK-NEXT: $x10 = COPY [[OR6]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 + ; + ; UNALIGNED-LABEL: name: load_i64_unaligned + ; UNALIGNED: liveins: $x10 + ; UNALIGNED-NEXT: {{ $}} + ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64), align 1) + ; UNALIGNED-NEXT: $x10 = COPY [[LOAD]](s64) + ; UNALIGNED-NEXT: PseudoRET implicit $x10 %0:_(p0) = COPY $x10 %1:_(s64) = G_LOAD %0(p0) :: (load (s64), align 1) $x10 = COPY %1(s64) @@ -431,6 +523,14 @@ body: | ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[OR]] ; CHECK-NEXT: $x10 = COPY [[OR2]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 + ; + ; UNALIGNED-LABEL: name: load_i64_align2 + ; UNALIGNED: liveins: $x10 + ; UNALIGNED-NEXT: {{ $}} + ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64), align 2) + ; UNALIGNED-NEXT: $x10 = COPY [[LOAD]](s64) + ; UNALIGNED-NEXT: PseudoRET implicit $x10 %0:_(p0) = COPY $x10 %1:_(s64) = G_LOAD %0(p0) :: (load (s64), align 2) $x10 = COPY %1(s64) @@ -550,6 +650,18 @@ body: | ; CHECK-NEXT: $x10 = COPY [[OR6]](s64) ; CHECK-NEXT: $x11 = COPY [[OR13]](s64) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 + ; + ; UNALIGNED-LABEL: name: load_i128_unaligned + ; UNALIGNED: liveins: $x10 + ; UNALIGNED-NEXT: {{ $}} + ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64), align 1) + ; UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 8, align 1) + ; UNALIGNED-NEXT: $x10 = COPY [[LOAD]](s64) + ; UNALIGNED-NEXT: $x11 = COPY [[LOAD1]](s64) + ; UNALIGNED-NEXT: PseudoRET implicit $x10, implicit $x11 %0:_(p0) = COPY $x10 %1:_(s128) = G_LOAD %0(p0) :: (load (s128), align 1) %2:_(s64), %3:_(s64) = G_UNMERGE_VALUES %1(s128) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-store-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-store-rv32.mir index 2ece5a8c9d41..791bdb30c490 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-store-rv32.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-store-rv32.mir @@ -1,6 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=riscv32 -run-pass=legalizer %s -o - \ -# RUN: | FileCheck %s +# RUN: | FileCheck %s +# RUN: llc -mtriple=riscv32 -mattr=+unaligned-scalar-mem -run-pass=legalizer %s -o - \ +# RUN: | FileCheck %s --check-prefix=UNALIGNED --- name: store_i8 @@ -26,6 +28,14 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11 ; CHECK-NEXT: G_STORE [[COPY]](s32), [[COPY1]](p0) :: (store (s8)) ; CHECK-NEXT: PseudoRET + ; + ; UNALIGNED-LABEL: name: store_i8 + ; UNALIGNED: liveins: $x10, $x11 + ; UNALIGNED-NEXT: {{ $}} + ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11 + ; UNALIGNED-NEXT: G_STORE [[COPY]](s32), [[COPY1]](p0) :: (store (s8)) + ; UNALIGNED-NEXT: PseudoRET %2:_(s32) = COPY $x10 %0:_(s8) = G_TRUNC %2(s32) %1:_(p0) = COPY $x11 @@ -57,6 +67,14 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11 ; CHECK-NEXT: G_STORE [[COPY]](s32), [[COPY1]](p0) :: (store (s16)) ; CHECK-NEXT: PseudoRET + ; + ; UNALIGNED-LABEL: name: store_i16 + ; UNALIGNED: liveins: $x10, $x11 + ; UNALIGNED-NEXT: {{ $}} + ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11 + ; UNALIGNED-NEXT: G_STORE [[COPY]](s32), [[COPY1]](p0) :: (store (s16)) + ; UNALIGNED-NEXT: PseudoRET %2:_(s32) = COPY $x10 %0:_(s16) = G_TRUNC %2(s32) %1:_(p0) = COPY $x11 @@ -87,6 +105,14 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11 ; CHECK-NEXT: G_STORE [[COPY]](s32), [[COPY1]](p0) :: (store (s32)) ; CHECK-NEXT: PseudoRET + ; + ; UNALIGNED-LABEL: name: store_i32 + ; UNALIGNED: liveins: $x10, $x11 + ; UNALIGNED-NEXT: {{ $}} + ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11 + ; UNALIGNED-NEXT: G_STORE [[COPY]](s32), [[COPY1]](p0) :: (store (s32)) + ; UNALIGNED-NEXT: PseudoRET %0:_(s32) = COPY $x10 %1:_(p0) = COPY $x11 G_STORE %0(s32), %1(p0) :: (store (s32)) @@ -122,6 +148,18 @@ body: | ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY2]], [[C]](s32) ; CHECK-NEXT: G_STORE [[COPY1]](s32), [[PTR_ADD]](p0) :: (store (s32) into unknown-address + 4) ; CHECK-NEXT: PseudoRET + ; + ; UNALIGNED-LABEL: name: store_i64 + ; UNALIGNED: liveins: $x10, $x11, $x12 + ; UNALIGNED-NEXT: {{ $}} + ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $x12 + ; UNALIGNED-NEXT: G_STORE [[COPY]](s32), [[COPY2]](p0) :: (store (s32), align 8) + ; UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY2]], [[C]](s32) + ; UNALIGNED-NEXT: G_STORE [[COPY1]](s32), [[PTR_ADD]](p0) :: (store (s32) into unknown-address + 4) + ; UNALIGNED-NEXT: PseudoRET %2:_(s32) = COPY $x10 %3:_(s32) = COPY $x11 %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) @@ -153,6 +191,14 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11 ; CHECK-NEXT: G_STORE [[COPY]](p0), [[COPY1]](p0) :: (store (p0), align 8) ; CHECK-NEXT: PseudoRET + ; + ; UNALIGNED-LABEL: name: store_ptr + ; UNALIGNED: liveins: $x10, $x11 + ; UNALIGNED-NEXT: {{ $}} + ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11 + ; UNALIGNED-NEXT: G_STORE [[COPY]](p0), [[COPY1]](p0) :: (store (p0), align 8) + ; UNALIGNED-NEXT: PseudoRET %0:_(p0) = COPY $x10 %1:_(p0) = COPY $x11 G_STORE %0(p0), %1(p0) :: (store (p0), align 8) @@ -190,6 +236,14 @@ body: | ; CHECK-NEXT: G_STORE [[COPY]](s32), [[COPY1]](p0) :: (store (s8)) ; CHECK-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p0) :: (store (s8) into unknown-address + 1) ; CHECK-NEXT: PseudoRET + ; + ; UNALIGNED-LABEL: name: store_i16_unaligned + ; UNALIGNED: liveins: $x10, $x11 + ; UNALIGNED-NEXT: {{ $}} + ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11 + ; UNALIGNED-NEXT: G_STORE [[COPY]](s32), [[COPY1]](p0) :: (store (s16), align 1) + ; UNALIGNED-NEXT: PseudoRET %2:_(s32) = COPY $x10 %0:_(s16) = G_TRUNC %2(s32) %1:_(p0) = COPY $x11 @@ -238,6 +292,14 @@ body: | ; CHECK-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p0) :: (store (s8) into unknown-address + 2) ; CHECK-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p0) :: (store (s8) into unknown-address + 3) ; CHECK-NEXT: PseudoRET + ; + ; UNALIGNED-LABEL: name: store_i32_unaligned + ; UNALIGNED: liveins: $x10, $x11 + ; UNALIGNED-NEXT: {{ $}} + ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11 + ; UNALIGNED-NEXT: G_STORE [[COPY]](s32), [[COPY1]](p0) :: (store (s32), align 1) + ; UNALIGNED-NEXT: PseudoRET %0:_(s32) = COPY $x10 %1:_(p0) = COPY $x11 G_STORE %0(s32), %1(p0) :: (store (s32), align 1) @@ -273,6 +335,14 @@ body: | ; CHECK-NEXT: G_STORE [[COPY2]](s32), [[COPY1]](p0) :: (store (s16)) ; CHECK-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p0) :: (store (s16) into unknown-address + 2) ; CHECK-NEXT: PseudoRET + ; + ; UNALIGNED-LABEL: name: store_i32_align2 + ; UNALIGNED: liveins: $x10, $x11 + ; UNALIGNED-NEXT: {{ $}} + ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11 + ; UNALIGNED-NEXT: G_STORE [[COPY]](s32), [[COPY1]](p0) :: (store (s32), align 2) + ; UNALIGNED-NEXT: PseudoRET %0:_(s32) = COPY $x10 %1:_(p0) = COPY $x11 G_STORE %0(s32), %1(p0) :: (store (s32), align 2) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-store-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-store-rv64.mir index 85055561c4f9..860bc932d856 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-store-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-store-rv64.mir @@ -1,6 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=riscv64 -run-pass=legalizer %s -o - \ -# RUN: | FileCheck %s +# RUN: | FileCheck %s +# RUN: llc -mtriple=riscv64 -mattr=+unaligned-scalar-mem -run-pass=legalizer %s -o - \ +# RUN: | FileCheck %s --check-prefix=UNALIGNED --- name: store_i8 @@ -27,6 +29,15 @@ body: | ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK-NEXT: G_STORE [[TRUNC]](s32), [[COPY1]](p0) :: (store (s8)) ; CHECK-NEXT: PseudoRET + ; + ; UNALIGNED-LABEL: name: store_i8 + ; UNALIGNED: liveins: $x10, $x11 + ; UNALIGNED-NEXT: {{ $}} + ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11 + ; UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; UNALIGNED-NEXT: G_STORE [[TRUNC]](s32), [[COPY1]](p0) :: (store (s8)) + ; UNALIGNED-NEXT: PseudoRET %2:_(s64) = COPY $x10 %0:_(s8) = G_TRUNC %2(s64) %1:_(p0) = COPY $x11 @@ -59,6 +70,15 @@ body: | ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK-NEXT: G_STORE [[TRUNC]](s32), [[COPY1]](p0) :: (store (s16)) ; CHECK-NEXT: PseudoRET + ; + ; UNALIGNED-LABEL: name: store_i16 + ; UNALIGNED: liveins: $x10, $x11 + ; UNALIGNED-NEXT: {{ $}} + ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11 + ; UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; UNALIGNED-NEXT: G_STORE [[TRUNC]](s32), [[COPY1]](p0) :: (store (s16)) + ; UNALIGNED-NEXT: PseudoRET %2:_(s64) = COPY $x10 %0:_(s16) = G_TRUNC %2(s64) %1:_(p0) = COPY $x11 @@ -91,6 +111,15 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11 ; CHECK-NEXT: G_STORE [[TRUNC]](s32), [[COPY1]](p0) :: (store (s32)) ; CHECK-NEXT: PseudoRET + ; + ; UNALIGNED-LABEL: name: store_i32 + ; UNALIGNED: liveins: $x10, $x11 + ; UNALIGNED-NEXT: {{ $}} + ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11 + ; UNALIGNED-NEXT: G_STORE [[TRUNC]](s32), [[COPY1]](p0) :: (store (s32)) + ; UNALIGNED-NEXT: PseudoRET %2:_(s64) = COPY $x10 %0:_(s32) = G_TRUNC %2(s64) %1:_(p0) = COPY $x11 @@ -121,6 +150,14 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11 ; CHECK-NEXT: G_STORE [[COPY]](s64), [[COPY1]](p0) :: (store (s64)) ; CHECK-NEXT: PseudoRET + ; + ; UNALIGNED-LABEL: name: store_i64 + ; UNALIGNED: liveins: $x10, $x11 + ; UNALIGNED-NEXT: {{ $}} + ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11 + ; UNALIGNED-NEXT: G_STORE [[COPY]](s64), [[COPY1]](p0) :: (store (s64)) + ; UNALIGNED-NEXT: PseudoRET %0:_(s64) = COPY $x10 %1:_(p0) = COPY $x11 G_STORE %0(s64), %1(p0) :: (store (s64)) @@ -150,6 +187,14 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11 ; CHECK-NEXT: G_STORE [[COPY]](s64), [[COPY1]](p0) :: (store (s64)) ; CHECK-NEXT: PseudoRET + ; + ; UNALIGNED-LABEL: name: store_i128 + ; UNALIGNED: liveins: $x10, $x11 + ; UNALIGNED-NEXT: {{ $}} + ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11 + ; UNALIGNED-NEXT: G_STORE [[COPY]](s64), [[COPY1]](p0) :: (store (s64)) + ; UNALIGNED-NEXT: PseudoRET %0:_(s64) = COPY $x10 %1:_(p0) = COPY $x11 G_STORE %0(s64), %1(p0) :: (store (s64)) @@ -179,6 +224,14 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11 ; CHECK-NEXT: G_STORE [[COPY]](p0), [[COPY1]](p0) :: (store (p0)) ; CHECK-NEXT: PseudoRET + ; + ; UNALIGNED-LABEL: name: store_ptr + ; UNALIGNED: liveins: $x10, $x11 + ; UNALIGNED-NEXT: {{ $}} + ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11 + ; UNALIGNED-NEXT: G_STORE [[COPY]](p0), [[COPY1]](p0) :: (store (p0)) + ; UNALIGNED-NEXT: PseudoRET %0:_(p0) = COPY $x10 %1:_(p0) = COPY $x11 G_STORE %0(p0), %1(p0) :: (store (p0)) @@ -217,6 +270,15 @@ body: | ; CHECK-NEXT: G_STORE [[TRUNC]](s32), [[COPY1]](p0) :: (store (s8)) ; CHECK-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p0) :: (store (s8) into unknown-address + 1) ; CHECK-NEXT: PseudoRET + ; + ; UNALIGNED-LABEL: name: store_i16_unaligned + ; UNALIGNED: liveins: $x10, $x11 + ; UNALIGNED-NEXT: {{ $}} + ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11 + ; UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; UNALIGNED-NEXT: G_STORE [[TRUNC]](s32), [[COPY1]](p0) :: (store (s16), align 1) + ; UNALIGNED-NEXT: PseudoRET %2:_(s64) = COPY $x10 %0:_(s16) = G_TRUNC %2(s64) %1:_(p0) = COPY $x11 @@ -267,6 +329,15 @@ body: | ; CHECK-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p0) :: (store (s8) into unknown-address + 2) ; CHECK-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p0) :: (store (s8) into unknown-address + 3) ; CHECK-NEXT: PseudoRET + ; + ; UNALIGNED-LABEL: name: store_i32_unaligned + ; UNALIGNED: liveins: $x10, $x11 + ; UNALIGNED-NEXT: {{ $}} + ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11 + ; UNALIGNED-NEXT: G_STORE [[TRUNC]](s32), [[COPY1]](p0) :: (store (s32), align 1) + ; UNALIGNED-NEXT: PseudoRET %2:_(s64) = COPY $x10 %0:_(s32) = G_TRUNC %2(s64) %1:_(p0) = COPY $x11 @@ -305,6 +376,15 @@ body: | ; CHECK-NEXT: G_STORE [[COPY2]](s32), [[COPY1]](p0) :: (store (s16)) ; CHECK-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p0) :: (store (s16) into unknown-address + 2) ; CHECK-NEXT: PseudoRET + ; + ; UNALIGNED-LABEL: name: store_i32_align2 + ; UNALIGNED: liveins: $x10, $x11 + ; UNALIGNED-NEXT: {{ $}} + ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11 + ; UNALIGNED-NEXT: G_STORE [[TRUNC]](s32), [[COPY1]](p0) :: (store (s32), align 2) + ; UNALIGNED-NEXT: PseudoRET %2:_(s64) = COPY $x10 %0:_(s32) = G_TRUNC %2(s64) %1:_(p0) = COPY $x11 @@ -353,6 +433,14 @@ body: | ; CHECK-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p0) :: (store (s16) into unknown-address + 4) ; CHECK-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p0) :: (store (s16) into unknown-address + 6) ; CHECK-NEXT: PseudoRET + ; + ; UNALIGNED-LABEL: name: store_i64_align2 + ; UNALIGNED: liveins: $x10, $x11 + ; UNALIGNED-NEXT: {{ $}} + ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11 + ; UNALIGNED-NEXT: G_STORE [[COPY]](s64), [[COPY1]](p0) :: (store (s64), align 2) + ; UNALIGNED-NEXT: PseudoRET %0:_(s64) = COPY $x10 %1:_(p0) = COPY $x11 G_STORE %0(s64), %1(p0) :: (store (s64), align 2) diff --git a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll index d613e4ee0bc2..15cff650765e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll @@ -1,22 +1,428 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s \ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ ; RUN: --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ ; RUN: --check-prefixes=CHECK,ZVFHMIN +declare <vscale x 1 x bfloat> @llvm.vp.ceil.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x i1>, i32) + +define <vscale x 1 x bfloat> @vp_ceil_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_ceil_vv_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfabs.v v8, v9, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vmflt.vf v0, v8, fa5, v0.t +; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 1 x bfloat> @llvm.vp.ceil.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x bfloat> %v +} + +define <vscale x 1 x bfloat> @vp_ceil_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ceil_vv_nxv1bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfabs.v v8, v9 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 1 x bfloat> @llvm.vp.ceil.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl) + ret <vscale x 1 x bfloat> %v +} + +declare <vscale x 2 x bfloat> @llvm.vp.ceil.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x i1>, i32) + +define <vscale x 2 x bfloat> @vp_ceil_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_ceil_vv_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfabs.v v8, v9, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vmflt.vf v0, v8, fa5, v0.t +; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 2 x bfloat> @llvm.vp.ceil.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x bfloat> %v +} + +define <vscale x 2 x bfloat> @vp_ceil_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ceil_vv_nxv2bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfabs.v v8, v9 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 2 x bfloat> @llvm.vp.ceil.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl) + ret <vscale x 2 x bfloat> %v +} + +declare <vscale x 4 x bfloat> @llvm.vp.ceil.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x i1>, i32) + +define <vscale x 4 x bfloat> @vp_ceil_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_ceil_vv_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfabs.v v12, v10, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vmflt.vf v9, v12, fa5, v0.t +; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v12, v10, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vfsgnj.vv v10, v12, v10, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %v = call <vscale x 4 x bfloat> @llvm.vp.ceil.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x bfloat> %v +} + +define <vscale x 4 x bfloat> @vp_ceil_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ceil_vv_nxv4bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfabs.v v8, v10 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %v = call <vscale x 4 x bfloat> @llvm.vp.ceil.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl) + ret <vscale x 4 x bfloat> %v +} + +declare <vscale x 8 x bfloat> @llvm.vp.ceil.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, i32) + +define <vscale x 8 x bfloat> @vp_ceil_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_ceil_vv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfabs.v v16, v12, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vmflt.vf v10, v16, fa5, v0.t +; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v16, v12, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vfsgnj.vv v12, v16, v12, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %v = call <vscale x 8 x bfloat> @llvm.vp.ceil.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x bfloat> %v +} + +define <vscale x 8 x bfloat> @vp_ceil_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ceil_vv_nxv8bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfabs.v v8, v12 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vfsgnj.vv v12, v8, v12, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %v = call <vscale x 8 x bfloat> @llvm.vp.ceil.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl) + ret <vscale x 8 x bfloat> %v +} + +declare <vscale x 16 x bfloat> @llvm.vp.ceil.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x i1>, i32) + +define <vscale x 16 x bfloat> @vp_ceil_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_ceil_vv_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vmflt.vf v12, v24, fa5, v0.t +; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %v = call <vscale x 16 x bfloat> @llvm.vp.ceil.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x bfloat> %v +} + +define <vscale x 16 x bfloat> @vp_ceil_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ceil_vv_nxv16bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v8, v16 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %v = call <vscale x 16 x bfloat> @llvm.vp.ceil.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl) + ret <vscale x 16 x bfloat> %v +} + +declare <vscale x 32 x bfloat> @llvm.vp.ceil.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x i1>, i32) + +define <vscale x 32 x bfloat> @vp_ceil_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_ceil_vv_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a1, a2, 1 +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: sltu a4, a0, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: srli a2, a2, 2 +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v17, v0, a2 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v8, v24, v0.t +; CHECK-NEXT: lui a2, 307200 +; CHECK-NEXT: fmv.w.x fa5, a2 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vmflt.vf v17, v8, fa5, v0.t +; CHECK-NEXT: fsrmi a2, 3 +; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t +; CHECK-NEXT: fsrm a2 +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24 +; CHECK-NEXT: bltu a0, a1, .LBB10_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB10_2: +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v0 +; CHECK-NEXT: vmv1r.v v8, v16 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v16, v24, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vmflt.vf v8, v16, fa5, v0.t +; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call <vscale x 32 x bfloat> @llvm.vp.ceil.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x bfloat> %v +} + +define <vscale x 32 x bfloat> @vp_ceil_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_ceil_vv_nxv32bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a1, a2, 1 +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: sltu a4, a0, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: srli a2, a2, 2 +; CHECK-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v16, v16, a2 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v8, v24, v0.t +; CHECK-NEXT: lui a2, 307200 +; CHECK-NEXT: fmv.w.x fa5, a2 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vmflt.vf v16, v8, fa5, v0.t +; CHECK-NEXT: fsrmi a2, 3 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t +; CHECK-NEXT: fsrm a2 +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24 +; CHECK-NEXT: bltu a0, a1, .LBB11_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB11_2: +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v16 +; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call <vscale x 32 x bfloat> @llvm.vp.ceil.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl) + ret <vscale x 32 x bfloat> %v +} declare <vscale x 1 x half> @llvm.vp.ceil.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32) define <vscale x 1 x half> @vp_ceil_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_vv_nxv1f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI0_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI12_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI12_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu @@ -57,8 +463,8 @@ define <vscale x 1 x half> @vp_ceil_vv_nxv1f16(<vscale x 1 x half> %va, <vscale define <vscale x 1 x half> @vp_ceil_vv_nxv1f16_unmasked(<vscale x 1 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_vv_nxv1f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI1_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI13_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI13_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 @@ -97,8 +503,8 @@ declare <vscale x 2 x half> @llvm.vp.ceil.nxv2f16(<vscale x 2 x half>, <vscale x define <vscale x 2 x half> @vp_ceil_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_vv_nxv2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI2_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI2_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI14_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI14_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu @@ -139,8 +545,8 @@ define <vscale x 2 x half> @vp_ceil_vv_nxv2f16(<vscale x 2 x half> %va, <vscale define <vscale x 2 x half> @vp_ceil_vv_nxv2f16_unmasked(<vscale x 2 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_vv_nxv2f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI3_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI3_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI15_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI15_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 @@ -179,8 +585,8 @@ declare <vscale x 4 x half> @llvm.vp.ceil.nxv4f16(<vscale x 4 x half>, <vscale x define <vscale x 4 x half> @vp_ceil_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_vv_nxv4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI4_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI4_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI16_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI16_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu @@ -223,8 +629,8 @@ define <vscale x 4 x half> @vp_ceil_vv_nxv4f16(<vscale x 4 x half> %va, <vscale define <vscale x 4 x half> @vp_ceil_vv_nxv4f16_unmasked(<vscale x 4 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_vv_nxv4f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI5_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI5_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI17_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI17_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 @@ -263,8 +669,8 @@ declare <vscale x 8 x half> @llvm.vp.ceil.nxv8f16(<vscale x 8 x half>, <vscale x define <vscale x 8 x half> @vp_ceil_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_vv_nxv8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI6_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI18_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI18_0)(a1) ; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8, v0.t @@ -309,8 +715,8 @@ define <vscale x 8 x half> @vp_ceil_vv_nxv8f16(<vscale x 8 x half> %va, <vscale define <vscale x 8 x half> @vp_ceil_vv_nxv8f16_unmasked(<vscale x 8 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_vv_nxv8f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI7_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI19_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI19_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 ; ZVFH-NEXT: vmflt.vf v0, v10, fa5 @@ -349,8 +755,8 @@ declare <vscale x 16 x half> @llvm.vp.ceil.nxv16f16(<vscale x 16 x half>, <vscal define <vscale x 16 x half> @vp_ceil_vv_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_vv_nxv16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI8_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI20_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI20_0)(a1) ; ZVFH-NEXT: vmv1r.v v12, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8, v0.t @@ -395,8 +801,8 @@ define <vscale x 16 x half> @vp_ceil_vv_nxv16f16(<vscale x 16 x half> %va, <vsca define <vscale x 16 x half> @vp_ceil_vv_nxv16f16_unmasked(<vscale x 16 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_vv_nxv16f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI9_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI21_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI21_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8 ; ZVFH-NEXT: vmflt.vf v0, v12, fa5 @@ -435,8 +841,8 @@ declare <vscale x 32 x half> @llvm.vp.ceil.nxv32f16(<vscale x 32 x half>, <vscal define <vscale x 32 x half> @vp_ceil_vv_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_vv_nxv32f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI10_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI22_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI22_0)(a1) ; ZVFH-NEXT: vmv1r.v v16, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v24, v8, v0.t @@ -491,10 +897,10 @@ define <vscale x 32 x half> @vp_ceil_vv_nxv32f16(<vscale x 32 x half> %va, <vsca ; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 -; ZVFHMIN-NEXT: bltu a0, a1, .LBB10_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB22_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB10_2: +; ZVFHMIN-NEXT: .LBB22_2: ; ZVFHMIN-NEXT: addi a1, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 @@ -533,8 +939,8 @@ define <vscale x 32 x half> @vp_ceil_vv_nxv32f16(<vscale x 32 x half> %va, <vsca define <vscale x 32 x half> @vp_ceil_vv_nxv32f16_unmasked(<vscale x 32 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_vv_nxv32f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI11_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI23_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI23_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8 ; ZVFH-NEXT: vmflt.vf v0, v16, fa5 @@ -586,10 +992,10 @@ define <vscale x 32 x half> @vp_ceil_vv_nxv32f16_unmasked(<vscale x 32 x half> % ; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 -; ZVFHMIN-NEXT: bltu a0, a1, .LBB11_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB11_2: +; ZVFHMIN-NEXT: .LBB23_2: ; ZVFHMIN-NEXT: addi a1, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 @@ -834,8 +1240,8 @@ declare <vscale x 1 x double> @llvm.vp.ceil.nxv1f64(<vscale x 1 x double>, <vsca define <vscale x 1 x double> @vp_ceil_vv_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI22_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI34_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI34_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu @@ -855,8 +1261,8 @@ define <vscale x 1 x double> @vp_ceil_vv_nxv1f64(<vscale x 1 x double> %va, <vsc define <vscale x 1 x double> @vp_ceil_vv_nxv1f64_unmasked(<vscale x 1 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv1f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI23_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI35_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI35_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -876,8 +1282,8 @@ declare <vscale x 2 x double> @llvm.vp.ceil.nxv2f64(<vscale x 2 x double>, <vsca define <vscale x 2 x double> @vp_ceil_vv_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI24_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI36_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a1) ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t @@ -899,8 +1305,8 @@ define <vscale x 2 x double> @vp_ceil_vv_nxv2f64(<vscale x 2 x double> %va, <vsc define <vscale x 2 x double> @vp_ceil_vv_nxv2f64_unmasked(<vscale x 2 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv2f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI25_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI37_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI37_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -920,8 +1326,8 @@ declare <vscale x 4 x double> @llvm.vp.ceil.nxv4f64(<vscale x 4 x double>, <vsca define <vscale x 4 x double> @vp_ceil_vv_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI26_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI38_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a1) ; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t @@ -943,8 +1349,8 @@ define <vscale x 4 x double> @vp_ceil_vv_nxv4f64(<vscale x 4 x double> %va, <vsc define <vscale x 4 x double> @vp_ceil_vv_nxv4f64_unmasked(<vscale x 4 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv4f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI27_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI39_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI39_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -964,8 +1370,8 @@ declare <vscale x 7 x double> @llvm.vp.ceil.nxv7f64(<vscale x 7 x double>, <vsca define <vscale x 7 x double> @vp_ceil_vv_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv7f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI28_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI28_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI40_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a1) ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t @@ -987,8 +1393,8 @@ define <vscale x 7 x double> @vp_ceil_vv_nxv7f64(<vscale x 7 x double> %va, <vsc define <vscale x 7 x double> @vp_ceil_vv_nxv7f64_unmasked(<vscale x 7 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv7f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI29_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI29_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI41_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI41_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -1008,8 +1414,8 @@ declare <vscale x 8 x double> @llvm.vp.ceil.nxv8f64(<vscale x 8 x double>, <vsca define <vscale x 8 x double> @vp_ceil_vv_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI30_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI30_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI42_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a1) ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t @@ -1031,8 +1437,8 @@ define <vscale x 8 x double> @vp_ceil_vv_nxv8f64(<vscale x 8 x double> %va, <vsc define <vscale x 8 x double> @vp_ceil_vv_nxv8f64_unmasked(<vscale x 8 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv8f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI31_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI31_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI43_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI43_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -1065,8 +1471,8 @@ define <vscale x 16 x double> @vp_ceil_vv_nxv16f64(<vscale x 16 x double> %va, < ; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vx v6, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: lui a3, %hi(.LCPI32_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI32_0)(a3) +; CHECK-NEXT: lui a3, %hi(.LCPI44_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI44_0)(a3) ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 @@ -1087,10 +1493,10 @@ define <vscale x 16 x double> @vp_ceil_vv_nxv16f64(<vscale x 16 x double> %va, < ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB32_2 +; CHECK-NEXT: bltu a0, a1, .LBB44_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB32_2: +; CHECK-NEXT: .LBB44_2: ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t @@ -1118,8 +1524,8 @@ define <vscale x 16 x double> @vp_ceil_vv_nxv16f64_unmasked(<vscale x 16 x doubl ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: lui a3, %hi(.LCPI33_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI33_0)(a3) +; CHECK-NEXT: lui a3, %hi(.LCPI45_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI45_0)(a3) ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 @@ -1132,10 +1538,10 @@ define <vscale x 16 x double> @vp_ceil_vv_nxv16f64_unmasked(<vscale x 16 x doubl ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB33_2 +; CHECK-NEXT: bltu a0, a1, .LBB45_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB33_2: +; CHECK-NEXT: .LBB45_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 diff --git a/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll index 9efc3183f15a..ee16b476dc84 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll @@ -1,124 +1,408 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN -define <vscale x 1 x half> @ceil_nxv1f16(<vscale x 1 x half> %x) { -; CHECK-LABEL: ceil_nxv1f16: +define <vscale x 1 x bfloat> @ceil_nxv1bf16(<vscale x 1 x bfloat> %x) { +; CHECK-LABEL: ceil_nxv1bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI0_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfabs.v v8, v9 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: ret - %a = call <vscale x 1 x half> @llvm.ceil.nxv1f16(<vscale x 1 x half> %x) - ret <vscale x 1 x half> %a + %a = call <vscale x 1 x bfloat> @llvm.ceil.nxv1bf16(<vscale x 1 x bfloat> %x) + ret <vscale x 1 x bfloat> %a } -declare <vscale x 1 x half> @llvm.ceil.nxv1f16(<vscale x 1 x half>) -define <vscale x 2 x half> @ceil_nxv2f16(<vscale x 2 x half> %x) { -; CHECK-LABEL: ceil_nxv2f16: +define <vscale x 2 x bfloat> @ceil_nxv2bf16(<vscale x 2 x bfloat> %x) { +; CHECK-LABEL: ceil_nxv2bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI1_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfabs.v v8, v9 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: ret - %a = call <vscale x 2 x half> @llvm.ceil.nxv2f16(<vscale x 2 x half> %x) - ret <vscale x 2 x half> %a + %a = call <vscale x 2 x bfloat> @llvm.ceil.nxv2bf16(<vscale x 2 x bfloat> %x) + ret <vscale x 2 x bfloat> %a } -declare <vscale x 2 x half> @llvm.ceil.nxv2f16(<vscale x 2 x half>) -define <vscale x 4 x half> @ceil_nxv4f16(<vscale x 4 x half> %x) { -; CHECK-LABEL: ceil_nxv4f16: +define <vscale x 4 x bfloat> @ceil_nxv4bf16(<vscale x 4 x bfloat> %x) { +; CHECK-LABEL: ceil_nxv4bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI2_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfabs.v v8, v10 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 ; CHECK-NEXT: ret - %a = call <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half> %x) - ret <vscale x 4 x half> %a + %a = call <vscale x 4 x bfloat> @llvm.ceil.nxv4bf16(<vscale x 4 x bfloat> %x) + ret <vscale x 4 x bfloat> %a } -declare <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half>) -define <vscale x 8 x half> @ceil_nxv8f16(<vscale x 8 x half> %x) { -; CHECK-LABEL: ceil_nxv8f16: +define <vscale x 8 x bfloat> @ceil_nxv8bf16(<vscale x 8 x bfloat> %x) { +; CHECK-LABEL: ceil_nxv8bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI3_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfabs.v v8, v12 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vfsgnj.vv v12, v8, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 ; CHECK-NEXT: ret - %a = call <vscale x 8 x half> @llvm.ceil.nxv8f16(<vscale x 8 x half> %x) - ret <vscale x 8 x half> %a + %a = call <vscale x 8 x bfloat> @llvm.ceil.nxv8bf16(<vscale x 8 x bfloat> %x) + ret <vscale x 8 x bfloat> %a } -declare <vscale x 8 x half> @llvm.ceil.nxv8f16(<vscale x 8 x half>) -define <vscale x 16 x half> @ceil_nxv16f16(<vscale x 16 x half> %x) { -; CHECK-LABEL: ceil_nxv16f16: +define <vscale x 16 x bfloat> @ceil_nxv16bf16(<vscale x 16 x bfloat> %x) { +; CHECK-LABEL: ceil_nxv16bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI4_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v8, v16 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 ; CHECK-NEXT: ret - %a = call <vscale x 16 x half> @llvm.ceil.nxv16f16(<vscale x 16 x half> %x) - ret <vscale x 16 x half> %a + %a = call <vscale x 16 x bfloat> @llvm.ceil.nxv16bf16(<vscale x 16 x bfloat> %x) + ret <vscale x 16 x bfloat> %a } -declare <vscale x 16 x half> @llvm.ceil.nxv16f16(<vscale x 16 x half>) -define <vscale x 32 x half> @ceil_nxv32f16(<vscale x 32 x half> %x) { -; CHECK-LABEL: ceil_nxv32f16: +define <vscale x 32 x bfloat> @ceil_nxv32bf16(<vscale x 32 x bfloat> %x) { +; CHECK-LABEL: ceil_nxv32bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI5_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v16 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v24, fa5 ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v16 +; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 ; CHECK-NEXT: ret + %a = call <vscale x 32 x bfloat> @llvm.ceil.nxv32bf16(<vscale x 32 x bfloat> %x) + ret <vscale x 32 x bfloat> %a +} + +define <vscale x 1 x half> @ceil_nxv1f16(<vscale x 1 x half> %x) { +; ZVFH-LABEL: ceil_nxv1f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, %hi(.LCPI6_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0) +; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: fsrmi a0, 3 +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: ceil_nxv1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 3 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret + %a = call <vscale x 1 x half> @llvm.ceil.nxv1f16(<vscale x 1 x half> %x) + ret <vscale x 1 x half> %a +} +declare <vscale x 1 x half> @llvm.ceil.nxv1f16(<vscale x 1 x half>) + +define <vscale x 2 x half> @ceil_nxv2f16(<vscale x 2 x half> %x) { +; ZVFH-LABEL: ceil_nxv2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, %hi(.LCPI7_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a0) +; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: fsrmi a0, 3 +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: ceil_nxv2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 3 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret + %a = call <vscale x 2 x half> @llvm.ceil.nxv2f16(<vscale x 2 x half> %x) + ret <vscale x 2 x half> %a +} +declare <vscale x 2 x half> @llvm.ceil.nxv2f16(<vscale x 2 x half>) + +define <vscale x 4 x half> @ceil_nxv4f16(<vscale x 4 x half> %x) { +; ZVFH-LABEL: ceil_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, %hi(.LCPI8_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a0) +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: fsrmi a0, 3 +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: ceil_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v10 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 3 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret + %a = call <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half> %x) + ret <vscale x 4 x half> %a +} +declare <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half>) + +define <vscale x 8 x half> @ceil_nxv8f16(<vscale x 8 x half> %x) { +; ZVFH-LABEL: ceil_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, %hi(.LCPI9_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a0) +; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFH-NEXT: vfabs.v v10, v8 +; ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; ZVFH-NEXT: fsrmi a0, 3 +; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: ceil_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v12 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 3 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret + %a = call <vscale x 8 x half> @llvm.ceil.nxv8f16(<vscale x 8 x half> %x) + ret <vscale x 8 x half> %a +} +declare <vscale x 8 x half> @llvm.ceil.nxv8f16(<vscale x 8 x half>) + +define <vscale x 16 x half> @ceil_nxv16f16(<vscale x 16 x half> %x) { +; ZVFH-LABEL: ceil_nxv16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, %hi(.LCPI10_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a0) +; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFH-NEXT: vfabs.v v12, v8 +; ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; ZVFH-NEXT: fsrmi a0, 3 +; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: ceil_nxv16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v16 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 3 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v16, v8, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret + %a = call <vscale x 16 x half> @llvm.ceil.nxv16f16(<vscale x 16 x half> %x) + ret <vscale x 16 x half> %a +} +declare <vscale x 16 x half> @llvm.ceil.nxv16f16(<vscale x 16 x half>) + +define <vscale x 32 x half> @ceil_nxv32f16(<vscale x 32 x half> %x) { +; ZVFH-LABEL: ceil_nxv32f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, %hi(.LCPI11_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a0) +; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; ZVFH-NEXT: vfabs.v v16, v8 +; ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; ZVFH-NEXT: fsrmi a0, 3 +; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: ceil_nxv32f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v24, v16 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 3 +; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v24, v16 +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 3 +; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: ret %a = call <vscale x 32 x half> @llvm.ceil.nxv32f16(<vscale x 32 x half> %x) ret <vscale x 32 x half> %a } @@ -227,8 +511,8 @@ declare <vscale x 16 x float> @llvm.ceil.nxv16f32(<vscale x 16 x float>) define <vscale x 1 x double> @ceil_nxv1f64(<vscale x 1 x double> %x) { ; CHECK-LABEL: ceil_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI11_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: lui a0, %hi(.LCPI17_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -247,8 +531,8 @@ declare <vscale x 1 x double> @llvm.ceil.nxv1f64(<vscale x 1 x double>) define <vscale x 2 x double> @ceil_nxv2f64(<vscale x 2 x double> %x) { ; CHECK-LABEL: ceil_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI12_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; CHECK-NEXT: lui a0, %hi(.LCPI18_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -267,8 +551,8 @@ declare <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double>) define <vscale x 4 x double> @ceil_nxv4f64(<vscale x 4 x double> %x) { ; CHECK-LABEL: ceil_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI13_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; CHECK-NEXT: lui a0, %hi(.LCPI19_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -287,8 +571,8 @@ declare <vscale x 4 x double> @llvm.ceil.nxv4f64(<vscale x 4 x double>) define <vscale x 8 x double> @ceil_nxv8f64(<vscale x 8 x double> %x) { ; CHECK-LABEL: ceil_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI14_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; CHECK-NEXT: lui a0, %hi(.LCPI20_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 diff --git a/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll index ec60b3ed3e0c..00e21ce8992b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll @@ -1,124 +1,414 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN -define <vscale x 1 x half> @floor_nxv1f16(<vscale x 1 x half> %x) { -; CHECK-LABEL: floor_nxv1f16: +define <vscale x 1 x bfloat> @floor_nxv1bf16(<vscale x 1 x bfloat> %x) { +; CHECK-LABEL: floor_nxv1bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI0_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfabs.v v8, v9 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: ret - %a = call <vscale x 1 x half> @llvm.floor.nxv1f16(<vscale x 1 x half> %x) - ret <vscale x 1 x half> %a + %a = call <vscale x 1 x bfloat> @llvm.floor.nxv1bf16(<vscale x 1 x bfloat> %x) + ret <vscale x 1 x bfloat> %a } -declare <vscale x 1 x half> @llvm.floor.nxv1f16(<vscale x 1 x half>) +declare <vscale x 1 x bfloat> @llvm.floor.nxv1bf16(<vscale x 1 x bfloat>) -define <vscale x 2 x half> @floor_nxv2f16(<vscale x 2 x half> %x) { -; CHECK-LABEL: floor_nxv2f16: +define <vscale x 2 x bfloat> @floor_nxv2bf16(<vscale x 2 x bfloat> %x) { +; CHECK-LABEL: floor_nxv2bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI1_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfabs.v v8, v9 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: ret - %a = call <vscale x 2 x half> @llvm.floor.nxv2f16(<vscale x 2 x half> %x) - ret <vscale x 2 x half> %a + %a = call <vscale x 2 x bfloat> @llvm.floor.nxv2bf16(<vscale x 2 x bfloat> %x) + ret <vscale x 2 x bfloat> %a } -declare <vscale x 2 x half> @llvm.floor.nxv2f16(<vscale x 2 x half>) +declare <vscale x 2 x bfloat> @llvm.floor.nxv2bf16(<vscale x 2 x bfloat>) -define <vscale x 4 x half> @floor_nxv4f16(<vscale x 4 x half> %x) { -; CHECK-LABEL: floor_nxv4f16: +define <vscale x 4 x bfloat> @floor_nxv4bf16(<vscale x 4 x bfloat> %x) { +; CHECK-LABEL: floor_nxv4bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI2_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfabs.v v8, v10 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 ; CHECK-NEXT: ret - %a = call <vscale x 4 x half> @llvm.floor.nxv4f16(<vscale x 4 x half> %x) - ret <vscale x 4 x half> %a + %a = call <vscale x 4 x bfloat> @llvm.floor.nxv4bf16(<vscale x 4 x bfloat> %x) + ret <vscale x 4 x bfloat> %a } -declare <vscale x 4 x half> @llvm.floor.nxv4f16(<vscale x 4 x half>) +declare <vscale x 4 x bfloat> @llvm.floor.nxv4bf16(<vscale x 4 x bfloat>) -define <vscale x 8 x half> @floor_nxv8f16(<vscale x 8 x half> %x) { -; CHECK-LABEL: floor_nxv8f16: +define <vscale x 8 x bfloat> @floor_nxv8bf16(<vscale x 8 x bfloat> %x) { +; CHECK-LABEL: floor_nxv8bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI3_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfabs.v v8, v12 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vfsgnj.vv v12, v8, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 ; CHECK-NEXT: ret - %a = call <vscale x 8 x half> @llvm.floor.nxv8f16(<vscale x 8 x half> %x) - ret <vscale x 8 x half> %a + %a = call <vscale x 8 x bfloat> @llvm.floor.nxv8bf16(<vscale x 8 x bfloat> %x) + ret <vscale x 8 x bfloat> %a } -declare <vscale x 8 x half> @llvm.floor.nxv8f16(<vscale x 8 x half>) +declare <vscale x 8 x bfloat> @llvm.floor.nxv8bf16(<vscale x 8 x bfloat>) -define <vscale x 16 x half> @floor_nxv16f16(<vscale x 16 x half> %x) { -; CHECK-LABEL: floor_nxv16f16: +define <vscale x 16 x bfloat> @floor_nxv16bf16(<vscale x 16 x bfloat> %x) { +; CHECK-LABEL: floor_nxv16bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI4_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v8, v16 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 ; CHECK-NEXT: ret - %a = call <vscale x 16 x half> @llvm.floor.nxv16f16(<vscale x 16 x half> %x) - ret <vscale x 16 x half> %a + %a = call <vscale x 16 x bfloat> @llvm.floor.nxv16bf16(<vscale x 16 x bfloat> %x) + ret <vscale x 16 x bfloat> %a } -declare <vscale x 16 x half> @llvm.floor.nxv16f16(<vscale x 16 x half>) +declare <vscale x 16 x bfloat> @llvm.floor.nxv16bf16(<vscale x 16 x bfloat>) -define <vscale x 32 x half> @floor_nxv32f16(<vscale x 32 x half> %x) { -; CHECK-LABEL: floor_nxv32f16: +define <vscale x 32 x bfloat> @floor_nxv32bf16(<vscale x 32 x bfloat> %x) { +; CHECK-LABEL: floor_nxv32bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI5_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v16 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v24, fa5 ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v16 +; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 ; CHECK-NEXT: ret + %a = call <vscale x 32 x bfloat> @llvm.floor.nxv32bf16(<vscale x 32 x bfloat> %x) + ret <vscale x 32 x bfloat> %a +} +declare <vscale x 32 x bfloat> @llvm.floor.nxv32bf16(<vscale x 32 x bfloat>) + +define <vscale x 1 x half> @floor_nxv1f16(<vscale x 1 x half> %x) { +; ZVFH-LABEL: floor_nxv1f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, %hi(.LCPI6_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0) +; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: fsrmi a0, 2 +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: floor_nxv1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 2 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret + %a = call <vscale x 1 x half> @llvm.floor.nxv1f16(<vscale x 1 x half> %x) + ret <vscale x 1 x half> %a +} +declare <vscale x 1 x half> @llvm.floor.nxv1f16(<vscale x 1 x half>) + +define <vscale x 2 x half> @floor_nxv2f16(<vscale x 2 x half> %x) { +; ZVFH-LABEL: floor_nxv2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, %hi(.LCPI7_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a0) +; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: fsrmi a0, 2 +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: floor_nxv2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 2 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret + %a = call <vscale x 2 x half> @llvm.floor.nxv2f16(<vscale x 2 x half> %x) + ret <vscale x 2 x half> %a +} +declare <vscale x 2 x half> @llvm.floor.nxv2f16(<vscale x 2 x half>) + +define <vscale x 4 x half> @floor_nxv4f16(<vscale x 4 x half> %x) { +; ZVFH-LABEL: floor_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, %hi(.LCPI8_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a0) +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: fsrmi a0, 2 +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: floor_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v10 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 2 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret + %a = call <vscale x 4 x half> @llvm.floor.nxv4f16(<vscale x 4 x half> %x) + ret <vscale x 4 x half> %a +} +declare <vscale x 4 x half> @llvm.floor.nxv4f16(<vscale x 4 x half>) + +define <vscale x 8 x half> @floor_nxv8f16(<vscale x 8 x half> %x) { +; ZVFH-LABEL: floor_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, %hi(.LCPI9_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a0) +; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFH-NEXT: vfabs.v v10, v8 +; ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; ZVFH-NEXT: fsrmi a0, 2 +; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: floor_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v12 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 2 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret + %a = call <vscale x 8 x half> @llvm.floor.nxv8f16(<vscale x 8 x half> %x) + ret <vscale x 8 x half> %a +} +declare <vscale x 8 x half> @llvm.floor.nxv8f16(<vscale x 8 x half>) + +define <vscale x 16 x half> @floor_nxv16f16(<vscale x 16 x half> %x) { +; ZVFH-LABEL: floor_nxv16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, %hi(.LCPI10_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a0) +; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFH-NEXT: vfabs.v v12, v8 +; ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; ZVFH-NEXT: fsrmi a0, 2 +; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: floor_nxv16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v16 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 2 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v16, v8, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret + %a = call <vscale x 16 x half> @llvm.floor.nxv16f16(<vscale x 16 x half> %x) + ret <vscale x 16 x half> %a +} +declare <vscale x 16 x half> @llvm.floor.nxv16f16(<vscale x 16 x half>) + +define <vscale x 32 x half> @floor_nxv32f16(<vscale x 32 x half> %x) { +; ZVFH-LABEL: floor_nxv32f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, %hi(.LCPI11_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a0) +; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; ZVFH-NEXT: vfabs.v v16, v8 +; ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; ZVFH-NEXT: fsrmi a0, 2 +; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: floor_nxv32f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v24, v16 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 2 +; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v24, v16 +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 2 +; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: ret %a = call <vscale x 32 x half> @llvm.floor.nxv32f16(<vscale x 32 x half> %x) ret <vscale x 32 x half> %a } @@ -227,8 +517,8 @@ declare <vscale x 16 x float> @llvm.floor.nxv16f32(<vscale x 16 x float>) define <vscale x 1 x double> @floor_nxv1f64(<vscale x 1 x double> %x) { ; CHECK-LABEL: floor_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI11_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: lui a0, %hi(.LCPI17_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -247,8 +537,8 @@ declare <vscale x 1 x double> @llvm.floor.nxv1f64(<vscale x 1 x double>) define <vscale x 2 x double> @floor_nxv2f64(<vscale x 2 x double> %x) { ; CHECK-LABEL: floor_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI12_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; CHECK-NEXT: lui a0, %hi(.LCPI18_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -267,8 +557,8 @@ declare <vscale x 2 x double> @llvm.floor.nxv2f64(<vscale x 2 x double>) define <vscale x 4 x double> @floor_nxv4f64(<vscale x 4 x double> %x) { ; CHECK-LABEL: floor_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI13_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; CHECK-NEXT: lui a0, %hi(.LCPI19_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -287,8 +577,8 @@ declare <vscale x 4 x double> @llvm.floor.nxv4f64(<vscale x 4 x double>) define <vscale x 8 x double> @floor_nxv8f64(<vscale x 8 x double> %x) { ; CHECK-LABEL: floor_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI14_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; CHECK-NEXT: lui a0, %hi(.LCPI20_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll index d996a9c05aca..b5c40fbfaac6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll @@ -5545,457 +5545,24 @@ define void @trunc_v8f16(ptr %x) { ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; -; ZVFHMIN-ZFH-RV32-LABEL: trunc_v8f16: -; ZVFHMIN-ZFH-RV32: # %bb.0: -; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, -16 -; ZVFHMIN-ZFH-RV32-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMIN-ZFH-RV32-NEXT: mv a1, sp -; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 2(sp) -; ZVFHMIN-ZFH-RV32-NEXT: lui a1, %hi(.LCPI115_0) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, %lo(.LCPI115_0)(a1) -; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa3, fa4 -; ZVFHMIN-ZFH-RV32-NEXT: flt.h a1, fa3, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: beqz a1, .LBB115_2 -; ZVFHMIN-ZFH-RV32-NEXT: # %bb.1: -; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a1, fa4, rtz -; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa3, a1, rtz -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa4, fa3, fa4 -; ZVFHMIN-ZFH-RV32-NEXT: .LBB115_2: -; ZVFHMIN-ZFH-RV32-NEXT: flh fa1, 0(sp) -; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa3, fa1 -; ZVFHMIN-ZFH-RV32-NEXT: flt.h a1, fa3, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: beqz a1, .LBB115_4 -; ZVFHMIN-ZFH-RV32-NEXT: # %bb.3: -; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a1, fa1, rtz -; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa3, a1, rtz -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa1, fa3, fa1 -; ZVFHMIN-ZFH-RV32-NEXT: .LBB115_4: -; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 4(sp) -; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa3, fa2 -; ZVFHMIN-ZFH-RV32-NEXT: flt.h a1, fa3, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: beqz a1, .LBB115_6 -; ZVFHMIN-ZFH-RV32-NEXT: # %bb.5: -; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a1, fa2, rtz -; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa3, a1, rtz -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa2, fa3, fa2 -; ZVFHMIN-ZFH-RV32-NEXT: .LBB115_6: -; ZVFHMIN-ZFH-RV32-NEXT: flh fa3, 6(sp) -; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa0, fa3 -; ZVFHMIN-ZFH-RV32-NEXT: flt.h a1, fa0, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a2, fa1 -; ZVFHMIN-ZFH-RV32-NEXT: beqz a1, .LBB115_8 -; ZVFHMIN-ZFH-RV32-NEXT: # %bb.7: -; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a1, fa3, rtz -; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa1, a1, rtz -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa3, fa1, fa3 -; ZVFHMIN-ZFH-RV32-NEXT: .LBB115_8: -; ZVFHMIN-ZFH-RV32-NEXT: flh fa1, 10(sp) -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa4 -; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa4, fa1 -; ZVFHMIN-ZFH-RV32-NEXT: flt.h a3, fa4, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v8, a2 -; ZVFHMIN-ZFH-RV32-NEXT: beqz a3, .LBB115_10 -; ZVFHMIN-ZFH-RV32-NEXT: # %bb.9: -; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a2, fa1, rtz -; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa4, a2, rtz -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa1, fa4, fa1 -; ZVFHMIN-ZFH-RV32-NEXT: .LBB115_10: -; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 8(sp) -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a2, fa2 -; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa2, fa4 -; ZVFHMIN-ZFH-RV32-NEXT: flt.h a3, fa2, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa1 -; ZVFHMIN-ZFH-RV32-NEXT: beqz a3, .LBB115_12 -; ZVFHMIN-ZFH-RV32-NEXT: # %bb.11: -; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a3, fa4, rtz -; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa2, a3, rtz -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa4, fa2, fa4 -; ZVFHMIN-ZFH-RV32-NEXT: .LBB115_12: -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 12(sp) -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a2, fa3 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a3, fa4 -; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v9, a3 -; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa4, fa2 -; ZVFHMIN-ZFH-RV32-NEXT: flt.h a3, fa4, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFH-RV32-NEXT: beqz a3, .LBB115_14 -; ZVFHMIN-ZFH-RV32-NEXT: # %bb.13: -; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a1, fa2, rtz -; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa4, a1, rtz -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa2, fa4, fa2 -; ZVFHMIN-ZFH-RV32-NEXT: .LBB115_14: -; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 14(sp) -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa2 -; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa3, fa4 -; ZVFHMIN-ZFH-RV32-NEXT: flt.h a2, fa3, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFH-RV32-NEXT: beqz a2, .LBB115_16 -; ZVFHMIN-ZFH-RV32-NEXT: # %bb.15: -; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a1, fa4, rtz -; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa5, a1, rtz -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa4, fa5, fa4 -; ZVFHMIN-ZFH-RV32-NEXT: .LBB115_16: -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa4 -; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFH-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; ZVFHMIN-ZFH-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t -; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, 16 -; ZVFHMIN-ZFH-RV32-NEXT: ret -; -; ZVFHMIN-ZFH-RV64-LABEL: trunc_v8f16: -; ZVFHMIN-ZFH-RV64: # %bb.0: -; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, -16 -; ZVFHMIN-ZFH-RV64-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMIN-ZFH-RV64-NEXT: mv a1, sp -; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 2(sp) -; ZVFHMIN-ZFH-RV64-NEXT: lui a1, %hi(.LCPI115_0) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, %lo(.LCPI115_0)(a1) -; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa3, fa4 -; ZVFHMIN-ZFH-RV64-NEXT: flt.h a1, fa3, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: beqz a1, .LBB115_2 -; ZVFHMIN-ZFH-RV64-NEXT: # %bb.1: -; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a1, fa4, rtz -; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa3, a1, rtz -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa4, fa3, fa4 -; ZVFHMIN-ZFH-RV64-NEXT: .LBB115_2: -; ZVFHMIN-ZFH-RV64-NEXT: flh fa1, 0(sp) -; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa3, fa1 -; ZVFHMIN-ZFH-RV64-NEXT: flt.h a1, fa3, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: beqz a1, .LBB115_4 -; ZVFHMIN-ZFH-RV64-NEXT: # %bb.3: -; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a1, fa1, rtz -; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa3, a1, rtz -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa1, fa3, fa1 -; ZVFHMIN-ZFH-RV64-NEXT: .LBB115_4: -; ZVFHMIN-ZFH-RV64-NEXT: flh fa2, 4(sp) -; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa3, fa2 -; ZVFHMIN-ZFH-RV64-NEXT: flt.h a1, fa3, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: beqz a1, .LBB115_6 -; ZVFHMIN-ZFH-RV64-NEXT: # %bb.5: -; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a1, fa2, rtz -; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa3, a1, rtz -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa2, fa3, fa2 -; ZVFHMIN-ZFH-RV64-NEXT: .LBB115_6: -; ZVFHMIN-ZFH-RV64-NEXT: flh fa3, 6(sp) -; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa0, fa3 -; ZVFHMIN-ZFH-RV64-NEXT: flt.h a1, fa0, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a2, fa1 -; ZVFHMIN-ZFH-RV64-NEXT: beqz a1, .LBB115_8 -; ZVFHMIN-ZFH-RV64-NEXT: # %bb.7: -; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a1, fa3, rtz -; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa1, a1, rtz -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa3, fa1, fa3 -; ZVFHMIN-ZFH-RV64-NEXT: .LBB115_8: -; ZVFHMIN-ZFH-RV64-NEXT: flh fa1, 10(sp) -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa4 -; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa4, fa1 -; ZVFHMIN-ZFH-RV64-NEXT: flt.h a3, fa4, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.x v8, a2 -; ZVFHMIN-ZFH-RV64-NEXT: beqz a3, .LBB115_10 -; ZVFHMIN-ZFH-RV64-NEXT: # %bb.9: -; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a2, fa1, rtz -; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa4, a2, rtz -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa1, fa4, fa1 -; ZVFHMIN-ZFH-RV64-NEXT: .LBB115_10: -; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 8(sp) -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a2, fa2 -; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa2, fa4 -; ZVFHMIN-ZFH-RV64-NEXT: flt.h a3, fa2, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa1 -; ZVFHMIN-ZFH-RV64-NEXT: beqz a3, .LBB115_12 -; ZVFHMIN-ZFH-RV64-NEXT: # %bb.11: -; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a3, fa4, rtz -; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa2, a3, rtz -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa4, fa2, fa4 -; ZVFHMIN-ZFH-RV64-NEXT: .LBB115_12: -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-ZFH-RV64-NEXT: flh fa2, 12(sp) -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a2, fa3 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a3, fa4 -; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.x v9, a3 -; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa4, fa2 -; ZVFHMIN-ZFH-RV64-NEXT: flt.h a3, fa4, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFH-RV64-NEXT: beqz a3, .LBB115_14 -; ZVFHMIN-ZFH-RV64-NEXT: # %bb.13: -; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a1, fa2, rtz -; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa4, a1, rtz -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa2, fa4, fa2 -; ZVFHMIN-ZFH-RV64-NEXT: .LBB115_14: -; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 14(sp) -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa2 -; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa3, fa4 -; ZVFHMIN-ZFH-RV64-NEXT: flt.h a2, fa3, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFH-RV64-NEXT: beqz a2, .LBB115_16 -; ZVFHMIN-ZFH-RV64-NEXT: # %bb.15: -; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a1, fa4, rtz -; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa5, a1, rtz -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa4, fa5, fa4 -; ZVFHMIN-ZFH-RV64-NEXT: .LBB115_16: -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa4 -; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFH-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; ZVFHMIN-ZFH-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t -; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v9, (a0) -; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, 16 -; ZVFHMIN-ZFH-RV64-NEXT: ret -; -; ZVFHMIN-ZFHIN-RV32-LABEL: trunc_v8f16: -; ZVFHMIN-ZFHIN-RV32: # %bb.0: -; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -16 -; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp -; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: lui a1, 307200 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.w.x fa5, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa3, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a1, fa3, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a1, .LBB115_2 -; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.1: -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a1, fa4, rtz -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa3, a1, rtz -; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa4, fa3, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB115_2: -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 0(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa2, fa3 -; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa3, fa2 -; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a1, fa3, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a1, .LBB115_4 -; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.3: -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a1, fa2, rtz -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa3, a1, rtz -; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa2, fa3, fa2 -; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB115_4: -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 4(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa3, fa3 -; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa1, fa3 -; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a1, fa1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa1, fa2 -; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a1, .LBB115_6 -; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.5: -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a1, fa3, rtz -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa2, a1, rtz -; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa3, fa2, fa3 -; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB115_6: -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa0, 6(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa2, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa4, fa0 -; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa0, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a1, fa0, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa1 -; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a1, .LBB115_8 -; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.7: -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a1, fa4, rtz -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa1, a1, rtz -; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa4, fa1, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB115_8: -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa1, 10(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa2 -; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa2, fa1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa1, fa2 -; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a2, fa1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa3, fa3 -; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a2, .LBB115_10 -; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.9: -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a2, fa2, rtz -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa1, a2, rtz -; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa2, fa1, fa2 -; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB115_10: -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa1, 8(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa3 -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa3, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa2, fa2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa4, fa1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa1, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a3, fa1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa2 -; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a3, .LBB115_12 -; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.11: -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a3, fa4, rtz -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa2, a3, rtz -; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa4, fa2, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB115_12: -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa3 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 12(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa4, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa4, fa3 -; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa3, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a3, fa3, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a3, .LBB115_14 -; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.13: -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a2, fa4, rtz -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa3, a2, rtz -; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa4, fa3, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB115_14: -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 14(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa4, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa4, fa3 -; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa3, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a2, fa3, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a2, .LBB115_16 -; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.15: -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a1, fa4, rtz -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa5, a1, rtz -; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa4, fa5, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB115_16: -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa5, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t -; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 16 -; ZVFHMIN-ZFHIN-RV32-NEXT: ret -; -; ZVFHMIN-ZFHIN-RV64-LABEL: trunc_v8f16: -; ZVFHMIN-ZFHIN-RV64: # %bb.0: -; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -16 -; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp -; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: lui a1, 307200 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.w.x fa5, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa3, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a1, fa3, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a1, .LBB115_2 -; ZVFHMIN-ZFHIN-RV64-NEXT: # %bb.1: -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a1, fa4, rtz -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa3, a1, rtz -; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa4, fa3, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: .LBB115_2: -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 0(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa2, fa3 -; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa3, fa2 -; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a1, fa3, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a1, .LBB115_4 -; ZVFHMIN-ZFHIN-RV64-NEXT: # %bb.3: -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a1, fa2, rtz -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa3, a1, rtz -; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa2, fa3, fa2 -; ZVFHMIN-ZFHIN-RV64-NEXT: .LBB115_4: -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 4(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa3, fa3 -; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa1, fa3 -; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a1, fa1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa1, fa2 -; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a1, .LBB115_6 -; ZVFHMIN-ZFHIN-RV64-NEXT: # %bb.5: -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a1, fa3, rtz -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa2, a1, rtz -; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa3, fa2, fa3 -; ZVFHMIN-ZFHIN-RV64-NEXT: .LBB115_6: -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa0, 6(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa2, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa4, fa0 -; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa0, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a1, fa0, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa1 -; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a1, .LBB115_8 -; ZVFHMIN-ZFHIN-RV64-NEXT: # %bb.7: -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a1, fa4, rtz -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa1, a1, rtz -; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa4, fa1, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: .LBB115_8: -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa1, 10(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa2 -; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa2, fa1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa1, fa2 -; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a2, fa1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa3, fa3 -; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a2, .LBB115_10 -; ZVFHMIN-ZFHIN-RV64-NEXT: # %bb.9: -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a2, fa2, rtz -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa1, a2, rtz -; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa2, fa1, fa2 -; ZVFHMIN-ZFHIN-RV64-NEXT: .LBB115_10: -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa1, 8(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa3 -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa3, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa2, fa2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa4, fa1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa1, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a3, fa1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa2 -; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a3, .LBB115_12 -; ZVFHMIN-ZFHIN-RV64-NEXT: # %bb.11: -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a3, fa4, rtz -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa2, a3, rtz -; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa4, fa2, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: .LBB115_12: -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa3 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 12(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa4, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa4, fa3 -; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa3, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a3, fa3, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a3, .LBB115_14 -; ZVFHMIN-ZFHIN-RV64-NEXT: # %bb.13: -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a2, fa4, rtz -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa3, a2, rtz -; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa4, fa3, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: .LBB115_14: -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 14(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa4, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa4, fa3 -; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa3, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a2, fa3, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a2, .LBB115_16 -; ZVFHMIN-ZFHIN-RV64-NEXT: # %bb.15: -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a1, fa4, rtz -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa5, a1, rtz -; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa4, fa5, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: .LBB115_16: -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa5, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t -; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a0) -; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 16 -; ZVFHMIN-ZFHIN-RV64-NEXT: ret +; ZVFHMIN-LABEL: trunc_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9 +; ZVFHMIN-NEXT: lui a1, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vse16.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = call <8 x half> @llvm.trunc.v8f16(<8 x half> %a) store <8 x half> %b, ptr %x @@ -6020,461 +5587,25 @@ define void @trunc_v6f16(ptr %x) { ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; -; ZVFHMIN-ZFH-RV32-LABEL: trunc_v6f16: -; ZVFHMIN-ZFH-RV32: # %bb.0: -; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, -16 -; ZVFHMIN-ZFH-RV32-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, ma -; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMIN-ZFH-RV32-NEXT: mv a1, sp -; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 2(sp) -; ZVFHMIN-ZFH-RV32-NEXT: lui a1, %hi(.LCPI116_0) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, %lo(.LCPI116_0)(a1) -; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa3, fa4 -; ZVFHMIN-ZFH-RV32-NEXT: flt.h a1, fa3, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: beqz a1, .LBB116_2 -; ZVFHMIN-ZFH-RV32-NEXT: # %bb.1: -; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a1, fa4, rtz -; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa3, a1, rtz -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa4, fa3, fa4 -; ZVFHMIN-ZFH-RV32-NEXT: .LBB116_2: -; ZVFHMIN-ZFH-RV32-NEXT: flh fa1, 0(sp) -; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa3, fa1 -; ZVFHMIN-ZFH-RV32-NEXT: flt.h a1, fa3, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: beqz a1, .LBB116_4 -; ZVFHMIN-ZFH-RV32-NEXT: # %bb.3: -; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a1, fa1, rtz -; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa3, a1, rtz -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa1, fa3, fa1 -; ZVFHMIN-ZFH-RV32-NEXT: .LBB116_4: -; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 4(sp) -; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa3, fa2 -; ZVFHMIN-ZFH-RV32-NEXT: flt.h a1, fa3, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: beqz a1, .LBB116_6 -; ZVFHMIN-ZFH-RV32-NEXT: # %bb.5: -; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a1, fa2, rtz -; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa3, a1, rtz -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa2, fa3, fa2 -; ZVFHMIN-ZFH-RV32-NEXT: .LBB116_6: -; ZVFHMIN-ZFH-RV32-NEXT: flh fa3, 6(sp) -; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa0, fa3 -; ZVFHMIN-ZFH-RV32-NEXT: flt.h a1, fa0, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a2, fa1 -; ZVFHMIN-ZFH-RV32-NEXT: beqz a1, .LBB116_8 -; ZVFHMIN-ZFH-RV32-NEXT: # %bb.7: -; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a1, fa3, rtz -; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa1, a1, rtz -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa3, fa1, fa3 -; ZVFHMIN-ZFH-RV32-NEXT: .LBB116_8: -; ZVFHMIN-ZFH-RV32-NEXT: flh fa1, 10(sp) -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa4 -; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa4, fa1 -; ZVFHMIN-ZFH-RV32-NEXT: flt.h a3, fa4, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v8, a2 -; ZVFHMIN-ZFH-RV32-NEXT: beqz a3, .LBB116_10 -; ZVFHMIN-ZFH-RV32-NEXT: # %bb.9: -; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a2, fa1, rtz -; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa4, a2, rtz -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa1, fa4, fa1 -; ZVFHMIN-ZFH-RV32-NEXT: .LBB116_10: -; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 8(sp) -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a2, fa2 -; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa2, fa4 -; ZVFHMIN-ZFH-RV32-NEXT: flt.h a3, fa2, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa1 -; ZVFHMIN-ZFH-RV32-NEXT: beqz a3, .LBB116_12 -; ZVFHMIN-ZFH-RV32-NEXT: # %bb.11: -; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a3, fa4, rtz -; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa2, a3, rtz -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa4, fa2, fa4 -; ZVFHMIN-ZFH-RV32-NEXT: .LBB116_12: -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 12(sp) -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a2, fa3 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a3, fa4 -; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v9, a3 -; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa4, fa2 -; ZVFHMIN-ZFH-RV32-NEXT: flt.h a3, fa4, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFH-RV32-NEXT: beqz a3, .LBB116_14 -; ZVFHMIN-ZFH-RV32-NEXT: # %bb.13: -; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a1, fa2, rtz -; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa4, a1, rtz -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa2, fa4, fa2 -; ZVFHMIN-ZFH-RV32-NEXT: .LBB116_14: -; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 14(sp) -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa2 -; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa3, fa4 -; ZVFHMIN-ZFH-RV32-NEXT: flt.h a2, fa3, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFH-RV32-NEXT: beqz a2, .LBB116_16 -; ZVFHMIN-ZFH-RV32-NEXT: # %bb.15: -; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a1, fa4, rtz -; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa5, a1, rtz -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa4, fa5, fa4 -; ZVFHMIN-ZFH-RV32-NEXT: .LBB116_16: -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa4 -; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, mu -; ZVFHMIN-ZFH-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t -; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, 16 -; ZVFHMIN-ZFH-RV32-NEXT: ret -; -; ZVFHMIN-ZFH-RV64-LABEL: trunc_v6f16: -; ZVFHMIN-ZFH-RV64: # %bb.0: -; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, -16 -; ZVFHMIN-ZFH-RV64-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, ma -; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMIN-ZFH-RV64-NEXT: mv a1, sp -; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 2(sp) -; ZVFHMIN-ZFH-RV64-NEXT: lui a1, %hi(.LCPI116_0) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, %lo(.LCPI116_0)(a1) -; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa3, fa4 -; ZVFHMIN-ZFH-RV64-NEXT: flt.h a1, fa3, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: beqz a1, .LBB116_2 -; ZVFHMIN-ZFH-RV64-NEXT: # %bb.1: -; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a1, fa4, rtz -; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa3, a1, rtz -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa4, fa3, fa4 -; ZVFHMIN-ZFH-RV64-NEXT: .LBB116_2: -; ZVFHMIN-ZFH-RV64-NEXT: flh fa1, 0(sp) -; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa3, fa1 -; ZVFHMIN-ZFH-RV64-NEXT: flt.h a1, fa3, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: beqz a1, .LBB116_4 -; ZVFHMIN-ZFH-RV64-NEXT: # %bb.3: -; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a1, fa1, rtz -; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa3, a1, rtz -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa1, fa3, fa1 -; ZVFHMIN-ZFH-RV64-NEXT: .LBB116_4: -; ZVFHMIN-ZFH-RV64-NEXT: flh fa2, 4(sp) -; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa3, fa2 -; ZVFHMIN-ZFH-RV64-NEXT: flt.h a1, fa3, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: beqz a1, .LBB116_6 -; ZVFHMIN-ZFH-RV64-NEXT: # %bb.5: -; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a1, fa2, rtz -; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa3, a1, rtz -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa2, fa3, fa2 -; ZVFHMIN-ZFH-RV64-NEXT: .LBB116_6: -; ZVFHMIN-ZFH-RV64-NEXT: flh fa3, 6(sp) -; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa0, fa3 -; ZVFHMIN-ZFH-RV64-NEXT: flt.h a1, fa0, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a2, fa1 -; ZVFHMIN-ZFH-RV64-NEXT: beqz a1, .LBB116_8 -; ZVFHMIN-ZFH-RV64-NEXT: # %bb.7: -; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a1, fa3, rtz -; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa1, a1, rtz -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa3, fa1, fa3 -; ZVFHMIN-ZFH-RV64-NEXT: .LBB116_8: -; ZVFHMIN-ZFH-RV64-NEXT: flh fa1, 10(sp) -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa4 -; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa4, fa1 -; ZVFHMIN-ZFH-RV64-NEXT: flt.h a3, fa4, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.x v8, a2 -; ZVFHMIN-ZFH-RV64-NEXT: beqz a3, .LBB116_10 -; ZVFHMIN-ZFH-RV64-NEXT: # %bb.9: -; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a2, fa1, rtz -; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa4, a2, rtz -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa1, fa4, fa1 -; ZVFHMIN-ZFH-RV64-NEXT: .LBB116_10: -; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 8(sp) -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a2, fa2 -; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa2, fa4 -; ZVFHMIN-ZFH-RV64-NEXT: flt.h a3, fa2, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa1 -; ZVFHMIN-ZFH-RV64-NEXT: beqz a3, .LBB116_12 -; ZVFHMIN-ZFH-RV64-NEXT: # %bb.11: -; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a3, fa4, rtz -; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa2, a3, rtz -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa4, fa2, fa4 -; ZVFHMIN-ZFH-RV64-NEXT: .LBB116_12: -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-ZFH-RV64-NEXT: flh fa2, 12(sp) -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a2, fa3 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a3, fa4 -; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.x v9, a3 -; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa4, fa2 -; ZVFHMIN-ZFH-RV64-NEXT: flt.h a3, fa4, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFH-RV64-NEXT: beqz a3, .LBB116_14 -; ZVFHMIN-ZFH-RV64-NEXT: # %bb.13: -; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a1, fa2, rtz -; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa4, a1, rtz -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa2, fa4, fa2 -; ZVFHMIN-ZFH-RV64-NEXT: .LBB116_14: -; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 14(sp) -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa2 -; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa3, fa4 -; ZVFHMIN-ZFH-RV64-NEXT: flt.h a2, fa3, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFH-RV64-NEXT: beqz a2, .LBB116_16 -; ZVFHMIN-ZFH-RV64-NEXT: # %bb.15: -; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a1, fa4, rtz -; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa5, a1, rtz -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa4, fa5, fa4 -; ZVFHMIN-ZFH-RV64-NEXT: .LBB116_16: -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa4 -; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, mu -; ZVFHMIN-ZFH-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t -; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v9, (a0) -; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, 16 -; ZVFHMIN-ZFH-RV64-NEXT: ret -; -; ZVFHMIN-ZFHIN-RV32-LABEL: trunc_v6f16: -; ZVFHMIN-ZFHIN-RV32: # %bb.0: -; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -16 -; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, ma -; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp -; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: lui a1, 307200 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.w.x fa5, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa3, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a1, fa3, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a1, .LBB116_2 -; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.1: -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a1, fa4, rtz -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa3, a1, rtz -; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa4, fa3, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB116_2: -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 0(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa2, fa3 -; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa3, fa2 -; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a1, fa3, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a1, .LBB116_4 -; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.3: -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a1, fa2, rtz -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa3, a1, rtz -; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa2, fa3, fa2 -; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB116_4: -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 4(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa3, fa3 -; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa1, fa3 -; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a1, fa1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa1, fa2 -; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a1, .LBB116_6 -; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.5: -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a1, fa3, rtz -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa2, a1, rtz -; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa3, fa2, fa3 -; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB116_6: -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa0, 6(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa2, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa4, fa0 -; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa0, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a1, fa0, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa1 -; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a1, .LBB116_8 -; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.7: -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a1, fa4, rtz -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa1, a1, rtz -; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa4, fa1, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB116_8: -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa1, 10(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa2 -; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa2, fa1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa1, fa2 -; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a2, fa1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa3, fa3 -; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a2, .LBB116_10 -; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.9: -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a2, fa2, rtz -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa1, a2, rtz -; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa2, fa1, fa2 -; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB116_10: -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa1, 8(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa3 -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa3, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa2, fa2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa4, fa1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa1, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a3, fa1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa2 -; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a3, .LBB116_12 -; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.11: -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a3, fa4, rtz -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa2, a3, rtz -; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa4, fa2, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB116_12: -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa3 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 12(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa4, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa4, fa3 -; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa3, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a3, fa3, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a3, .LBB116_14 -; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.13: -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a2, fa4, rtz -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa3, a2, rtz -; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa4, fa3, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB116_14: -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 14(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa4, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa4, fa3 -; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa3, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a2, fa3, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a2, .LBB116_16 -; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.15: -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a1, fa4, rtz -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa5, a1, rtz -; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa4, fa5, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB116_16: -; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa5, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, mu -; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t -; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 16 -; ZVFHMIN-ZFHIN-RV32-NEXT: ret -; -; ZVFHMIN-ZFHIN-RV64-LABEL: trunc_v6f16: -; ZVFHMIN-ZFHIN-RV64: # %bb.0: -; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -16 -; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, ma -; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp -; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: lui a1, 307200 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.w.x fa5, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa3, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a1, fa3, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a1, .LBB116_2 -; ZVFHMIN-ZFHIN-RV64-NEXT: # %bb.1: -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a1, fa4, rtz -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa3, a1, rtz -; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa4, fa3, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: .LBB116_2: -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 0(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa2, fa3 -; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa3, fa2 -; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a1, fa3, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a1, .LBB116_4 -; ZVFHMIN-ZFHIN-RV64-NEXT: # %bb.3: -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a1, fa2, rtz -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa3, a1, rtz -; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa2, fa3, fa2 -; ZVFHMIN-ZFHIN-RV64-NEXT: .LBB116_4: -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 4(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa3, fa3 -; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa1, fa3 -; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a1, fa1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa1, fa2 -; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a1, .LBB116_6 -; ZVFHMIN-ZFHIN-RV64-NEXT: # %bb.5: -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a1, fa3, rtz -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa2, a1, rtz -; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa3, fa2, fa3 -; ZVFHMIN-ZFHIN-RV64-NEXT: .LBB116_6: -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa0, 6(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa2, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa4, fa0 -; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa0, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a1, fa0, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa1 -; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a1, .LBB116_8 -; ZVFHMIN-ZFHIN-RV64-NEXT: # %bb.7: -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a1, fa4, rtz -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa1, a1, rtz -; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa4, fa1, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: .LBB116_8: -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa1, 10(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa2 -; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa2, fa1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa1, fa2 -; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a2, fa1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa3, fa3 -; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a2, .LBB116_10 -; ZVFHMIN-ZFHIN-RV64-NEXT: # %bb.9: -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a2, fa2, rtz -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa1, a2, rtz -; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa2, fa1, fa2 -; ZVFHMIN-ZFHIN-RV64-NEXT: .LBB116_10: -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa1, 8(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa3 -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa3, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa2, fa2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa4, fa1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa1, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a3, fa1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa2 -; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a3, .LBB116_12 -; ZVFHMIN-ZFHIN-RV64-NEXT: # %bb.11: -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a3, fa4, rtz -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa2, a3, rtz -; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa4, fa2, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: .LBB116_12: -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa3 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 12(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa4, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa4, fa3 -; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa3, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a3, fa3, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a3, .LBB116_14 -; ZVFHMIN-ZFHIN-RV64-NEXT: # %bb.13: -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a2, fa4, rtz -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa3, a2, rtz -; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa4, fa3, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: .LBB116_14: -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 14(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa4, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa4, fa3 -; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa3, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a2, fa3, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a2, .LBB116_16 -; ZVFHMIN-ZFHIN-RV64-NEXT: # %bb.15: -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a1, fa4, rtz -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa5, a1, rtz -; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa4, fa5, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: .LBB116_16: -; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa5, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, mu -; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t -; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a0) -; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 16 -; ZVFHMIN-ZFHIN-RV64-NEXT: ret +; ZVFHMIN-LABEL: trunc_v6f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9 +; ZVFHMIN-NEXT: lui a1, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vse16.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <6 x half>, ptr %x %b = call <6 x half> @llvm.trunc.v6f16(<6 x half> %a) store <6 x half> %b, ptr %x diff --git a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll index 45334ea8648f..03d1fb6c8d29 100644 --- a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll @@ -1,22 +1,428 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s \ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ ; RUN: --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ ; RUN: --check-prefixes=CHECK,ZVFHMIN +declare <vscale x 1 x bfloat> @llvm.vp.floor.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x i1>, i32) + +define <vscale x 1 x bfloat> @vp_floor_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_floor_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfabs.v v8, v9, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vmflt.vf v0, v8, fa5, v0.t +; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 1 x bfloat> @llvm.vp.floor.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x bfloat> %v +} + +define <vscale x 1 x bfloat> @vp_floor_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_floor_nxv1bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfabs.v v8, v9 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 1 x bfloat> @llvm.vp.floor.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl) + ret <vscale x 1 x bfloat> %v +} + +declare <vscale x 2 x bfloat> @llvm.vp.floor.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x i1>, i32) + +define <vscale x 2 x bfloat> @vp_floor_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_floor_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfabs.v v8, v9, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vmflt.vf v0, v8, fa5, v0.t +; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 2 x bfloat> @llvm.vp.floor.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x bfloat> %v +} + +define <vscale x 2 x bfloat> @vp_floor_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_floor_nxv2bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfabs.v v8, v9 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 2 x bfloat> @llvm.vp.floor.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl) + ret <vscale x 2 x bfloat> %v +} + +declare <vscale x 4 x bfloat> @llvm.vp.floor.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x i1>, i32) + +define <vscale x 4 x bfloat> @vp_floor_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_floor_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfabs.v v12, v10, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vmflt.vf v9, v12, fa5, v0.t +; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v12, v10, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vfsgnj.vv v10, v12, v10, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %v = call <vscale x 4 x bfloat> @llvm.vp.floor.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x bfloat> %v +} + +define <vscale x 4 x bfloat> @vp_floor_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_floor_nxv4bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfabs.v v8, v10 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %v = call <vscale x 4 x bfloat> @llvm.vp.floor.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl) + ret <vscale x 4 x bfloat> %v +} + +declare <vscale x 8 x bfloat> @llvm.vp.floor.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, i32) + +define <vscale x 8 x bfloat> @vp_floor_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_floor_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfabs.v v16, v12, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vmflt.vf v10, v16, fa5, v0.t +; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v16, v12, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vfsgnj.vv v12, v16, v12, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %v = call <vscale x 8 x bfloat> @llvm.vp.floor.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x bfloat> %v +} + +define <vscale x 8 x bfloat> @vp_floor_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_floor_nxv8bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfabs.v v8, v12 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vfsgnj.vv v12, v8, v12, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %v = call <vscale x 8 x bfloat> @llvm.vp.floor.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl) + ret <vscale x 8 x bfloat> %v +} + +declare <vscale x 16 x bfloat> @llvm.vp.floor.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x i1>, i32) + +define <vscale x 16 x bfloat> @vp_floor_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_floor_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vmflt.vf v12, v24, fa5, v0.t +; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %v = call <vscale x 16 x bfloat> @llvm.vp.floor.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x bfloat> %v +} + +define <vscale x 16 x bfloat> @vp_floor_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_floor_nxv16bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v8, v16 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %v = call <vscale x 16 x bfloat> @llvm.vp.floor.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl) + ret <vscale x 16 x bfloat> %v +} + +declare <vscale x 32 x bfloat> @llvm.vp.floor.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x i1>, i32) + +define <vscale x 32 x bfloat> @vp_floor_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_floor_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a1, a2, 1 +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: sltu a4, a0, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: srli a2, a2, 2 +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v17, v0, a2 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v8, v24, v0.t +; CHECK-NEXT: lui a2, 307200 +; CHECK-NEXT: fmv.w.x fa5, a2 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vmflt.vf v17, v8, fa5, v0.t +; CHECK-NEXT: fsrmi a2, 2 +; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t +; CHECK-NEXT: fsrm a2 +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24 +; CHECK-NEXT: bltu a0, a1, .LBB10_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB10_2: +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v0 +; CHECK-NEXT: vmv1r.v v8, v16 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v16, v24, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vmflt.vf v8, v16, fa5, v0.t +; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call <vscale x 32 x bfloat> @llvm.vp.floor.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x bfloat> %v +} + +define <vscale x 32 x bfloat> @vp_floor_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_floor_nxv32bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a1, a2, 1 +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: sltu a4, a0, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: srli a2, a2, 2 +; CHECK-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v16, v16, a2 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v8, v24, v0.t +; CHECK-NEXT: lui a2, 307200 +; CHECK-NEXT: fmv.w.x fa5, a2 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vmflt.vf v16, v8, fa5, v0.t +; CHECK-NEXT: fsrmi a2, 2 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t +; CHECK-NEXT: fsrm a2 +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24 +; CHECK-NEXT: bltu a0, a1, .LBB11_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB11_2: +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v16 +; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call <vscale x 32 x bfloat> @llvm.vp.floor.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl) + ret <vscale x 32 x bfloat> %v +} declare <vscale x 1 x half> @llvm.vp.floor.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32) define <vscale x 1 x half> @vp_floor_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_nxv1f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI0_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI12_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI12_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu @@ -57,8 +463,8 @@ define <vscale x 1 x half> @vp_floor_nxv1f16(<vscale x 1 x half> %va, <vscale x define <vscale x 1 x half> @vp_floor_nxv1f16_unmasked(<vscale x 1 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_nxv1f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI1_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI13_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI13_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 @@ -97,8 +503,8 @@ declare <vscale x 2 x half> @llvm.vp.floor.nxv2f16(<vscale x 2 x half>, <vscale define <vscale x 2 x half> @vp_floor_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_nxv2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI2_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI2_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI14_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI14_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu @@ -139,8 +545,8 @@ define <vscale x 2 x half> @vp_floor_nxv2f16(<vscale x 2 x half> %va, <vscale x define <vscale x 2 x half> @vp_floor_nxv2f16_unmasked(<vscale x 2 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_nxv2f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI3_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI3_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI15_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI15_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 @@ -179,8 +585,8 @@ declare <vscale x 4 x half> @llvm.vp.floor.nxv4f16(<vscale x 4 x half>, <vscale define <vscale x 4 x half> @vp_floor_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_nxv4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI4_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI4_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI16_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI16_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu @@ -223,8 +629,8 @@ define <vscale x 4 x half> @vp_floor_nxv4f16(<vscale x 4 x half> %va, <vscale x define <vscale x 4 x half> @vp_floor_nxv4f16_unmasked(<vscale x 4 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_nxv4f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI5_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI5_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI17_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI17_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 @@ -263,8 +669,8 @@ declare <vscale x 8 x half> @llvm.vp.floor.nxv8f16(<vscale x 8 x half>, <vscale define <vscale x 8 x half> @vp_floor_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_nxv8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI6_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI18_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI18_0)(a1) ; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8, v0.t @@ -309,8 +715,8 @@ define <vscale x 8 x half> @vp_floor_nxv8f16(<vscale x 8 x half> %va, <vscale x define <vscale x 8 x half> @vp_floor_nxv8f16_unmasked(<vscale x 8 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_nxv8f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI7_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI19_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI19_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 ; ZVFH-NEXT: vmflt.vf v0, v10, fa5 @@ -349,8 +755,8 @@ declare <vscale x 16 x half> @llvm.vp.floor.nxv16f16(<vscale x 16 x half>, <vsca define <vscale x 16 x half> @vp_floor_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_nxv16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI8_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI20_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI20_0)(a1) ; ZVFH-NEXT: vmv1r.v v12, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8, v0.t @@ -395,8 +801,8 @@ define <vscale x 16 x half> @vp_floor_nxv16f16(<vscale x 16 x half> %va, <vscale define <vscale x 16 x half> @vp_floor_nxv16f16_unmasked(<vscale x 16 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_nxv16f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI9_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI21_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI21_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8 ; ZVFH-NEXT: vmflt.vf v0, v12, fa5 @@ -435,8 +841,8 @@ declare <vscale x 32 x half> @llvm.vp.floor.nxv32f16(<vscale x 32 x half>, <vsca define <vscale x 32 x half> @vp_floor_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_nxv32f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI10_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI22_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI22_0)(a1) ; ZVFH-NEXT: vmv1r.v v16, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v24, v8, v0.t @@ -491,10 +897,10 @@ define <vscale x 32 x half> @vp_floor_nxv32f16(<vscale x 32 x half> %va, <vscale ; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 -; ZVFHMIN-NEXT: bltu a0, a1, .LBB10_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB22_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB10_2: +; ZVFHMIN-NEXT: .LBB22_2: ; ZVFHMIN-NEXT: addi a1, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 @@ -533,8 +939,8 @@ define <vscale x 32 x half> @vp_floor_nxv32f16(<vscale x 32 x half> %va, <vscale define <vscale x 32 x half> @vp_floor_nxv32f16_unmasked(<vscale x 32 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_nxv32f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI11_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI23_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI23_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8 ; ZVFH-NEXT: vmflt.vf v0, v16, fa5 @@ -586,10 +992,10 @@ define <vscale x 32 x half> @vp_floor_nxv32f16_unmasked(<vscale x 32 x half> %va ; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 -; ZVFHMIN-NEXT: bltu a0, a1, .LBB11_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB11_2: +; ZVFHMIN-NEXT: .LBB23_2: ; ZVFHMIN-NEXT: addi a1, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 @@ -834,8 +1240,8 @@ declare <vscale x 1 x double> @llvm.vp.floor.nxv1f64(<vscale x 1 x double>, <vsc define <vscale x 1 x double> @vp_floor_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI22_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI34_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI34_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu @@ -855,8 +1261,8 @@ define <vscale x 1 x double> @vp_floor_nxv1f64(<vscale x 1 x double> %va, <vscal define <vscale x 1 x double> @vp_floor_nxv1f64_unmasked(<vscale x 1 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv1f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI23_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI35_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI35_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -876,8 +1282,8 @@ declare <vscale x 2 x double> @llvm.vp.floor.nxv2f64(<vscale x 2 x double>, <vsc define <vscale x 2 x double> @vp_floor_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI24_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI36_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a1) ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t @@ -899,8 +1305,8 @@ define <vscale x 2 x double> @vp_floor_nxv2f64(<vscale x 2 x double> %va, <vscal define <vscale x 2 x double> @vp_floor_nxv2f64_unmasked(<vscale x 2 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv2f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI25_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI37_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI37_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -920,8 +1326,8 @@ declare <vscale x 4 x double> @llvm.vp.floor.nxv4f64(<vscale x 4 x double>, <vsc define <vscale x 4 x double> @vp_floor_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI26_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI38_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a1) ; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t @@ -943,8 +1349,8 @@ define <vscale x 4 x double> @vp_floor_nxv4f64(<vscale x 4 x double> %va, <vscal define <vscale x 4 x double> @vp_floor_nxv4f64_unmasked(<vscale x 4 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv4f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI27_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI39_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI39_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -964,8 +1370,8 @@ declare <vscale x 7 x double> @llvm.vp.floor.nxv7f64(<vscale x 7 x double>, <vsc define <vscale x 7 x double> @vp_floor_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv7f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI28_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI28_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI40_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a1) ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t @@ -987,8 +1393,8 @@ define <vscale x 7 x double> @vp_floor_nxv7f64(<vscale x 7 x double> %va, <vscal define <vscale x 7 x double> @vp_floor_nxv7f64_unmasked(<vscale x 7 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv7f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI29_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI29_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI41_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI41_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -1008,8 +1414,8 @@ declare <vscale x 8 x double> @llvm.vp.floor.nxv8f64(<vscale x 8 x double>, <vsc define <vscale x 8 x double> @vp_floor_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI30_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI30_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI42_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a1) ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t @@ -1031,8 +1437,8 @@ define <vscale x 8 x double> @vp_floor_nxv8f64(<vscale x 8 x double> %va, <vscal define <vscale x 8 x double> @vp_floor_nxv8f64_unmasked(<vscale x 8 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv8f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI31_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI31_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI43_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI43_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -1065,8 +1471,8 @@ define <vscale x 16 x double> @vp_floor_nxv16f64(<vscale x 16 x double> %va, <vs ; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vx v6, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: lui a3, %hi(.LCPI32_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI32_0)(a3) +; CHECK-NEXT: lui a3, %hi(.LCPI44_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI44_0)(a3) ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 @@ -1087,10 +1493,10 @@ define <vscale x 16 x double> @vp_floor_nxv16f64(<vscale x 16 x double> %va, <vs ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB32_2 +; CHECK-NEXT: bltu a0, a1, .LBB44_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB32_2: +; CHECK-NEXT: .LBB44_2: ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t @@ -1118,8 +1524,8 @@ define <vscale x 16 x double> @vp_floor_nxv16f64_unmasked(<vscale x 16 x double> ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: lui a3, %hi(.LCPI33_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI33_0)(a3) +; CHECK-NEXT: lui a3, %hi(.LCPI45_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI45_0)(a3) ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 @@ -1132,10 +1538,10 @@ define <vscale x 16 x double> @vp_floor_nxv16f64_unmasked(<vscale x 16 x double> ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB33_2 +; CHECK-NEXT: bltu a0, a1, .LBB45_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB33_2: +; CHECK-NEXT: .LBB45_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 diff --git a/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll index 05896d8ef6ff..d8c3ab27cfad 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll @@ -1,21 +1,200 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v,+m -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v,+m -target-abi=ilp32d \ ; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v,+m -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v,+m -target-abi=lp64d \ ; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +declare <vscale x 1 x bfloat> @llvm.maximum.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x bfloat>) + +define <vscale x 1 x bfloat> @vfmax_nxv1bf16_vv(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b) { +; CHECK-LABEL: vfmax_nxv1bf16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v9, v9 +; CHECK-NEXT: vmfeq.vv v8, v10, v10 +; CHECK-NEXT: vmerge.vvm v11, v9, v10, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0 +; CHECK-NEXT: vfmax.vv v9, v8, v11 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 1 x bfloat> @llvm.maximum.nxv1bf16(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b) + ret <vscale x 1 x bfloat> %v +} + +declare <vscale x 2 x bfloat> @llvm.maximum.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat>) + +define <vscale x 2 x bfloat> @vfmax_nxv2bf16_vv(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) { +; CHECK-LABEL: vfmax_nxv2bf16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v9, v9 +; CHECK-NEXT: vmfeq.vv v8, v10, v10 +; CHECK-NEXT: vmerge.vvm v11, v9, v10, v0 +; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0 +; CHECK-NEXT: vfmax.vv v9, v8, v11 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 2 x bfloat> @llvm.maximum.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) + ret <vscale x 2 x bfloat> %v +} + +declare <vscale x 4 x bfloat> @llvm.maximum.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x bfloat>) + +define <vscale x 4 x bfloat> @vfmax_nxv4bf16_vv(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) { +; CHECK-LABEL: vfmax_nxv4bf16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v8, v10, v10 +; CHECK-NEXT: vmerge.vvm v14, v12, v10, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vvm v8, v10, v12, v0 +; CHECK-NEXT: vfmax.vv v10, v8, v14 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %v = call <vscale x 4 x bfloat> @llvm.maximum.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) + ret <vscale x 4 x bfloat> %v +} + +declare <vscale x 8 x bfloat> @llvm.maximum.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>) + +define <vscale x 8 x bfloat> @vfmax_nxv8bf16_vv(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) { +; CHECK-LABEL: vfmax_nxv8bf16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v8, v12, v12 +; CHECK-NEXT: vmerge.vvm v20, v16, v12, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vvm v8, v12, v16, v0 +; CHECK-NEXT: vfmax.vv v12, v8, v20 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %v = call <vscale x 8 x bfloat> @llvm.maximum.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) + ret <vscale x 8 x bfloat> %v +} + +declare <vscale x 16 x bfloat> @llvm.maximum.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x bfloat>) + +define <vscale x 16 x bfloat> @vfmax_nxv16bf16_vv(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b) { +; CHECK-LABEL: vfmax_nxv16bf16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v24, v24 +; CHECK-NEXT: vmfeq.vv v7, v16, v16 +; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmax.vv v16, v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call <vscale x 16 x bfloat> @llvm.maximum.nxv16bf16(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b) + ret <vscale x 16 x bfloat> %v +} + +declare <vscale x 32 x bfloat> @llvm.maximum.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x bfloat>) + +define <vscale x 32 x bfloat> @vfmax_nxv32bf16_vv(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b) nounwind { +; CHECK-LABEL: vfmax_nxv32bf16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmv8r.v v0, v8 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v0 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v3, v24, v24 +; CHECK-NEXT: vmerge.vvm v16, v8, v24, v0 +; CHECK-NEXT: vmv1r.v v0, v3 +; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 +; CHECK-NEXT: vfmax.vv v8, v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v4 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v7, v8, v8 +; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: vfmax.vv v16, v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call <vscale x 32 x bfloat> @llvm.maximum.nxv32bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b) + ret <vscale x 32 x bfloat> %v +} + declare <vscale x 1 x half> @llvm.maximum.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>) define <vscale x 1 x half> @vfmax_nxv1f16_vv(<vscale x 1 x half> %a, <vscale x 1 x half> %b) { diff --git a/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll index ab07fff59b21..320db35770cb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll @@ -1,13 +1,541 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v,+m \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v,+m \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 \ +; RUN: -mattr=+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v,+m \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 \ +; RUN: -mattr=+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v,+m \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN +declare <vscale x 1 x bfloat> @llvm.vp.maximum.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x i1>, i32) + +define <vscale x 1 x bfloat> @vfmax_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmax_vv_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v11, v11, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmerge.vvm v9, v11, v8, v0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmerge.vvm v8, v8, v11, v0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfmax.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 1 x bfloat> @llvm.vp.maximum.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x bfloat> %v +} + +define <vscale x 1 x bfloat> @vfmax_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, i32 zeroext %evl) { +; CHECK-LABEL: vfmax_vv_nxv1bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmfeq.vv v8, v11, v11 +; CHECK-NEXT: vmerge.vvm v9, v10, v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vvm v8, v11, v10, v0 +; CHECK-NEXT: vfmax.vv v9, v8, v9 +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 1 x bfloat> @llvm.vp.maximum.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl) + ret <vscale x 1 x bfloat> %v +} + +declare <vscale x 2 x bfloat> @llvm.vp.maximum.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x i1>, i32) + +define <vscale x 2 x bfloat> @vfmax_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmax_vv_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v11, v11, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vmerge.vvm v9, v11, v8, v0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmerge.vvm v8, v8, v11, v0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfmax.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 2 x bfloat> @llvm.vp.maximum.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x bfloat> %v +} + +define <vscale x 2 x bfloat> @vfmax_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, i32 zeroext %evl) { +; CHECK-LABEL: vfmax_vv_nxv2bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vmfeq.vv v8, v11, v11 +; CHECK-NEXT: vmerge.vvm v9, v10, v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vmerge.vvm v8, v11, v10, v0 +; CHECK-NEXT: vfmax.vv v9, v8, v9 +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 2 x bfloat> @llvm.vp.maximum.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl) + ret <vscale x 2 x bfloat> %v +} + +declare <vscale x 4 x bfloat> @llvm.vp.maximum.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x i1>, i32) + +define <vscale x 4 x bfloat> @vfmax_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmax_vv_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmfeq.vv v8, v12, v12, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmerge.vvm v16, v12, v14, v0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmfeq.vv v8, v14, v14, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vvm v8, v14, v12, v0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfmax.vv v10, v8, v16, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %v = call <vscale x 4 x bfloat> @llvm.vp.maximum.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x bfloat> %v +} + +define <vscale x 4 x bfloat> @vfmax_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, i32 zeroext %evl) { +; CHECK-LABEL: vfmax_vv_nxv4bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmfeq.vv v8, v12, v12 +; CHECK-NEXT: vmerge.vvm v14, v10, v12, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vvm v8, v12, v10, v0 +; CHECK-NEXT: vfmax.vv v10, v8, v14 +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %v = call <vscale x 4 x bfloat> @llvm.vp.maximum.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl) + ret <vscale x 4 x bfloat> %v +} + +declare <vscale x 8 x bfloat> @llvm.vp.maximum.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, i32) + +define <vscale x 8 x bfloat> @vfmax_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmax_vv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmfeq.vv v8, v16, v16, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v20, v10 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmerge.vvm v24, v16, v20, v0 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmfeq.vv v8, v20, v20, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vvm v8, v20, v16, v0 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfmax.vv v12, v8, v24, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %v = call <vscale x 8 x bfloat> @llvm.vp.maximum.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x bfloat> %v +} + +define <vscale x 8 x bfloat> @vfmax_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, i32 zeroext %evl) { +; CHECK-LABEL: vfmax_vv_nxv8bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmfeq.vv v8, v16, v16 +; CHECK-NEXT: vmerge.vvm v20, v12, v16, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vvm v8, v16, v12, v0 +; CHECK-NEXT: vfmax.vv v12, v8, v20 +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %v = call <vscale x 8 x bfloat> @llvm.vp.maximum.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl) + ret <vscale x 8 x bfloat> %v +} + +declare <vscale x 16 x bfloat> @llvm.vp.maximum.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x bfloat>, <vscale x 16 x i1>, i32) + +define <vscale x 16 x bfloat> @vfmax_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmax_vv_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vmv1r.v v7, v0 +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vmfeq.vv v8, v24, v24, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmfeq.vv v8, v16, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmax.vv v16, v8, v16, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call <vscale x 16 x bfloat> @llvm.vp.maximum.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x bfloat> %v +} + +define <vscale x 16 x bfloat> @vfmax_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, i32 zeroext %evl) { +; CHECK-LABEL: vfmax_vv_nxv16bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vmfeq.vv v7, v24, v24 +; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmax.vv v16, v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call <vscale x 16 x bfloat> @llvm.vp.maximum.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> splat (i1 true), i32 %evl) + ret <vscale x 16 x bfloat> %v +} + +declare <vscale x 32 x bfloat> @llvm.vp.maximum.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x bfloat>, <vscale x 32 x i1>, i32) + +define <vscale x 32 x bfloat> @vfmax_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmax_vv_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 34 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x22, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 34 * vlenb +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vmv8r.v v0, v8 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a1, a2, 1 +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: sltu a4, a0, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: srli a2, a2, 2 +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: slli a4, a4, 5 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vs1r.v v24, (a4) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v8, v24, a2 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v0, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v4 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vmfeq.vv v12, v24, v24, v0.t +; CHECK-NEXT: vmv8r.v v0, v16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a4, 24 +; CHECK-NEXT: mul a2, a2, a4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v4 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmfeq.vv v12, v16, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vfmax.vv v16, v16, v24, v0.t +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: bltu a0, a1, .LBB10_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB10_2: +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 5 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vmfeq.vv v8, v24, v24, v0.t +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 24 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vmerge.vvm v24, v24, v16, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmfeq.vv v8, v16, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmax.vv v16, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 34 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call <vscale x 32 x bfloat> @llvm.vp.maximum.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x bfloat> %v +} + +define <vscale x 32 x bfloat> @vfmax_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, i32 zeroext %evl) { +; CHECK-LABEL: vfmax_vv_nxv32bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 5 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a1, a2, 1 +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: sltu a4, a0, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: srli a2, a2, 2 +; CHECK-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; CHECK-NEXT: vmset.m v24 +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v7, v24, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vmfeq.vv v12, v24, v24, v0.t +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a4, 24 +; CHECK-NEXT: mul a2, a2, a4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmfeq.vv v12, v24, v24, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vfmax.vv v16, v16, v8, v0.t +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v4, v16 +; CHECK-NEXT: bltu a0, a1, .LBB11_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB11_2: +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v16 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 24 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vmfeq.vv v3, v16, v16 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 +; CHECK-NEXT: vmv1r.v v0, v3 +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vfmax.vv v16, v16, v24 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v0, v16 +; CHECK-NEXT: vmv8r.v v8, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call <vscale x 32 x bfloat> @llvm.vp.maximum.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> splat (i1 true), i32 %evl) + ret <vscale x 32 x bfloat> %v +} declare <vscale x 1 x half> @llvm.vp.maximum.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, <vscale x 1 x i1>, i32) define <vscale x 1 x half> @vfmax_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) { @@ -509,10 +1037,10 @@ define <vscale x 32 x half> @vfmax_vv_nxv32f16(<vscale x 32 x half> %va, <vscale ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: bltu a0, a1, .LBB10_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB22_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB10_2: +; ZVFHMIN-NEXT: .LBB22_2: ; ZVFHMIN-NEXT: csrr a1, vlenb ; ZVFHMIN-NEXT: slli a1, a1, 4 ; ZVFHMIN-NEXT: add a1, sp, a1 @@ -656,10 +1184,10 @@ define <vscale x 32 x half> @vfmax_vv_nxv32f16_unmasked(<vscale x 32 x half> %va ; ZVFHMIN-NEXT: vfmax.vv v16, v16, v8, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v16 -; ZVFHMIN-NEXT: bltu a0, a1, .LBB11_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB11_2: +; ZVFHMIN-NEXT: .LBB23_2: ; ZVFHMIN-NEXT: csrr a1, vlenb ; ZVFHMIN-NEXT: slli a1, a1, 4 ; ZVFHMIN-NEXT: add a1, sp, a1 @@ -1093,10 +1621,10 @@ define <vscale x 16 x double> @vfmax_vv_nxv16f64(<vscale x 16 x double> %va, <vs ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a2, a1, .LBB28_2 +; CHECK-NEXT: bltu a2, a1, .LBB40_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: .LBB28_2: +; CHECK-NEXT: .LBB40_2: ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 18 ; CHECK-NEXT: mul a0, a0, a1 @@ -1202,10 +1730,10 @@ define <vscale x 16 x double> @vfmax_vv_nxv16f64_unmasked(<vscale x 16 x double> ; CHECK-NEXT: vfmax.vv v8, v16, v8 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a2, a1, .LBB29_2 +; CHECK-NEXT: bltu a2, a1, .LBB41_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: .LBB29_2: +; CHECK-NEXT: .LBB41_2: ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll index e94259392498..2371840002f4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll @@ -1,21 +1,200 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v,+m -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v,+m -target-abi=ilp32d \ ; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v,+m -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v,+m -target-abi=lp64d \ ; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +declare <vscale x 1 x bfloat> @llvm.minimum.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x bfloat>) + +define <vscale x 1 x bfloat> @vfmin_nxv1bf16_vv(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b) { +; CHECK-LABEL: vfmin_nxv1bf16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v9, v9 +; CHECK-NEXT: vmfeq.vv v8, v10, v10 +; CHECK-NEXT: vmerge.vvm v11, v9, v10, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0 +; CHECK-NEXT: vfmin.vv v9, v8, v11 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 1 x bfloat> @llvm.minimum.nxv1bf16(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b) + ret <vscale x 1 x bfloat> %v +} + +declare <vscale x 2 x bfloat> @llvm.minimum.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat>) + +define <vscale x 2 x bfloat> @vfmin_nxv2bf16_vv(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) { +; CHECK-LABEL: vfmin_nxv2bf16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v9, v9 +; CHECK-NEXT: vmfeq.vv v8, v10, v10 +; CHECK-NEXT: vmerge.vvm v11, v9, v10, v0 +; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0 +; CHECK-NEXT: vfmin.vv v9, v8, v11 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 2 x bfloat> @llvm.minimum.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) + ret <vscale x 2 x bfloat> %v +} + +declare <vscale x 4 x bfloat> @llvm.minimum.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x bfloat>) + +define <vscale x 4 x bfloat> @vfmin_nxv4bf16_vv(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) { +; CHECK-LABEL: vfmin_nxv4bf16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v8, v10, v10 +; CHECK-NEXT: vmerge.vvm v14, v12, v10, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vvm v8, v10, v12, v0 +; CHECK-NEXT: vfmin.vv v10, v8, v14 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %v = call <vscale x 4 x bfloat> @llvm.minimum.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) + ret <vscale x 4 x bfloat> %v +} + +declare <vscale x 8 x bfloat> @llvm.minimum.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>) + +define <vscale x 8 x bfloat> @vfmin_nxv8bf16_vv(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) { +; CHECK-LABEL: vfmin_nxv8bf16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v8, v12, v12 +; CHECK-NEXT: vmerge.vvm v20, v16, v12, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vvm v8, v12, v16, v0 +; CHECK-NEXT: vfmin.vv v12, v8, v20 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %v = call <vscale x 8 x bfloat> @llvm.minimum.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) + ret <vscale x 8 x bfloat> %v +} + +declare <vscale x 16 x bfloat> @llvm.minimum.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x bfloat>) + +define <vscale x 16 x bfloat> @vfmin_nxv16bf16_vv(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b) { +; CHECK-LABEL: vfmin_nxv16bf16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v24, v24 +; CHECK-NEXT: vmfeq.vv v7, v16, v16 +; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmin.vv v16, v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call <vscale x 16 x bfloat> @llvm.minimum.nxv16bf16(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b) + ret <vscale x 16 x bfloat> %v +} + +declare <vscale x 32 x bfloat> @llvm.minimum.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x bfloat>) + +define <vscale x 32 x bfloat> @vfmin_nxv32bf16_vv(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b) nounwind { +; CHECK-LABEL: vfmin_nxv32bf16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmv8r.v v0, v8 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v0 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v3, v24, v24 +; CHECK-NEXT: vmerge.vvm v16, v8, v24, v0 +; CHECK-NEXT: vmv1r.v v0, v3 +; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 +; CHECK-NEXT: vfmin.vv v8, v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v4 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v7, v8, v8 +; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: vfmin.vv v16, v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call <vscale x 32 x bfloat> @llvm.minimum.nxv32bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b) + ret <vscale x 32 x bfloat> %v +} + declare <vscale x 1 x half> @llvm.minimum.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>) define <vscale x 1 x half> @vfmin_nxv1f16_vv(<vscale x 1 x half> %a, <vscale x 1 x half> %b) { diff --git a/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll index fc5b11284dab..03e3969f9141 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll @@ -1,13 +1,541 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v,+m \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v,+m \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 \ +; RUN: -mattr=+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v,+m \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 \ +; RUN: -mattr=+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v,+m \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN +declare <vscale x 1 x bfloat> @llvm.vp.minimum.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x i1>, i32) + +define <vscale x 1 x bfloat> @vfmin_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmin_vv_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v11, v11, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmerge.vvm v9, v11, v8, v0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmerge.vvm v8, v8, v11, v0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfmin.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 1 x bfloat> @llvm.vp.minimum.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x bfloat> %v +} + +define <vscale x 1 x bfloat> @vfmin_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, i32 zeroext %evl) { +; CHECK-LABEL: vfmin_vv_nxv1bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmfeq.vv v8, v11, v11 +; CHECK-NEXT: vmerge.vvm v9, v10, v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vvm v8, v11, v10, v0 +; CHECK-NEXT: vfmin.vv v9, v8, v9 +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 1 x bfloat> @llvm.vp.minimum.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl) + ret <vscale x 1 x bfloat> %v +} + +declare <vscale x 2 x bfloat> @llvm.vp.minimum.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x i1>, i32) + +define <vscale x 2 x bfloat> @vfmin_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmin_vv_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v11, v11, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vmerge.vvm v9, v11, v8, v0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmerge.vvm v8, v8, v11, v0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfmin.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 2 x bfloat> @llvm.vp.minimum.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x bfloat> %v +} + +define <vscale x 2 x bfloat> @vfmin_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, i32 zeroext %evl) { +; CHECK-LABEL: vfmin_vv_nxv2bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vmfeq.vv v8, v11, v11 +; CHECK-NEXT: vmerge.vvm v9, v10, v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vmerge.vvm v8, v11, v10, v0 +; CHECK-NEXT: vfmin.vv v9, v8, v9 +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 2 x bfloat> @llvm.vp.minimum.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl) + ret <vscale x 2 x bfloat> %v +} + +declare <vscale x 4 x bfloat> @llvm.vp.minimum.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x i1>, i32) + +define <vscale x 4 x bfloat> @vfmin_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmin_vv_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmfeq.vv v8, v12, v12, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmerge.vvm v16, v12, v14, v0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmfeq.vv v8, v14, v14, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vvm v8, v14, v12, v0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfmin.vv v10, v8, v16, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %v = call <vscale x 4 x bfloat> @llvm.vp.minimum.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x bfloat> %v +} + +define <vscale x 4 x bfloat> @vfmin_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, i32 zeroext %evl) { +; CHECK-LABEL: vfmin_vv_nxv4bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmfeq.vv v8, v12, v12 +; CHECK-NEXT: vmerge.vvm v14, v10, v12, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vvm v8, v12, v10, v0 +; CHECK-NEXT: vfmin.vv v10, v8, v14 +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %v = call <vscale x 4 x bfloat> @llvm.vp.minimum.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl) + ret <vscale x 4 x bfloat> %v +} + +declare <vscale x 8 x bfloat> @llvm.vp.minimum.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, i32) + +define <vscale x 8 x bfloat> @vfmin_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmin_vv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmfeq.vv v8, v16, v16, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v20, v10 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmerge.vvm v24, v16, v20, v0 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmfeq.vv v8, v20, v20, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vvm v8, v20, v16, v0 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfmin.vv v12, v8, v24, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %v = call <vscale x 8 x bfloat> @llvm.vp.minimum.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x bfloat> %v +} + +define <vscale x 8 x bfloat> @vfmin_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, i32 zeroext %evl) { +; CHECK-LABEL: vfmin_vv_nxv8bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmfeq.vv v8, v16, v16 +; CHECK-NEXT: vmerge.vvm v20, v12, v16, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vvm v8, v16, v12, v0 +; CHECK-NEXT: vfmin.vv v12, v8, v20 +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %v = call <vscale x 8 x bfloat> @llvm.vp.minimum.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl) + ret <vscale x 8 x bfloat> %v +} + +declare <vscale x 16 x bfloat> @llvm.vp.minimum.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x bfloat>, <vscale x 16 x i1>, i32) + +define <vscale x 16 x bfloat> @vfmin_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmin_vv_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vmv1r.v v7, v0 +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vmfeq.vv v8, v24, v24, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmfeq.vv v8, v16, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmin.vv v16, v8, v16, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call <vscale x 16 x bfloat> @llvm.vp.minimum.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x bfloat> %v +} + +define <vscale x 16 x bfloat> @vfmin_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, i32 zeroext %evl) { +; CHECK-LABEL: vfmin_vv_nxv16bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vmfeq.vv v7, v24, v24 +; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmin.vv v16, v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call <vscale x 16 x bfloat> @llvm.vp.minimum.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> splat (i1 true), i32 %evl) + ret <vscale x 16 x bfloat> %v +} + +declare <vscale x 32 x bfloat> @llvm.vp.minimum.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x bfloat>, <vscale x 32 x i1>, i32) + +define <vscale x 32 x bfloat> @vfmin_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmin_vv_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 34 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x22, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 34 * vlenb +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vmv8r.v v0, v8 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a1, a2, 1 +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: sltu a4, a0, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: srli a2, a2, 2 +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: slli a4, a4, 5 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vs1r.v v24, (a4) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v8, v24, a2 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v0, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v4 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vmfeq.vv v12, v24, v24, v0.t +; CHECK-NEXT: vmv8r.v v0, v16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a4, 24 +; CHECK-NEXT: mul a2, a2, a4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v4 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmfeq.vv v12, v16, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vfmin.vv v16, v16, v24, v0.t +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: bltu a0, a1, .LBB10_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB10_2: +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 5 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vmfeq.vv v8, v24, v24, v0.t +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 24 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vmerge.vvm v24, v24, v16, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmfeq.vv v8, v16, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmin.vv v16, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 34 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call <vscale x 32 x bfloat> @llvm.vp.minimum.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x bfloat> %v +} + +define <vscale x 32 x bfloat> @vfmin_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, i32 zeroext %evl) { +; CHECK-LABEL: vfmin_vv_nxv32bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 5 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a1, a2, 1 +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: sltu a4, a0, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: srli a2, a2, 2 +; CHECK-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; CHECK-NEXT: vmset.m v24 +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v7, v24, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vmfeq.vv v12, v24, v24, v0.t +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a4, 24 +; CHECK-NEXT: mul a2, a2, a4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmfeq.vv v12, v24, v24, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vfmin.vv v16, v16, v8, v0.t +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v4, v16 +; CHECK-NEXT: bltu a0, a1, .LBB11_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB11_2: +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v16 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 24 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vmfeq.vv v3, v16, v16 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 +; CHECK-NEXT: vmv1r.v v0, v3 +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vfmin.vv v16, v16, v24 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v0, v16 +; CHECK-NEXT: vmv8r.v v8, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call <vscale x 32 x bfloat> @llvm.vp.minimum.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> splat (i1 true), i32 %evl) + ret <vscale x 32 x bfloat> %v +} declare <vscale x 1 x half> @llvm.vp.minimum.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, <vscale x 1 x i1>, i32) define <vscale x 1 x half> @vfmin_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) { @@ -509,10 +1037,10 @@ define <vscale x 32 x half> @vfmin_vv_nxv32f16(<vscale x 32 x half> %va, <vscale ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: bltu a0, a1, .LBB10_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB22_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB10_2: +; ZVFHMIN-NEXT: .LBB22_2: ; ZVFHMIN-NEXT: csrr a1, vlenb ; ZVFHMIN-NEXT: slli a1, a1, 4 ; ZVFHMIN-NEXT: add a1, sp, a1 @@ -656,10 +1184,10 @@ define <vscale x 32 x half> @vfmin_vv_nxv32f16_unmasked(<vscale x 32 x half> %va ; ZVFHMIN-NEXT: vfmin.vv v16, v16, v8, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v16 -; ZVFHMIN-NEXT: bltu a0, a1, .LBB11_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB11_2: +; ZVFHMIN-NEXT: .LBB23_2: ; ZVFHMIN-NEXT: csrr a1, vlenb ; ZVFHMIN-NEXT: slli a1, a1, 4 ; ZVFHMIN-NEXT: add a1, sp, a1 @@ -1093,10 +1621,10 @@ define <vscale x 16 x double> @vfmin_vv_nxv16f64(<vscale x 16 x double> %va, <vs ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a2, a1, .LBB28_2 +; CHECK-NEXT: bltu a2, a1, .LBB40_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: .LBB28_2: +; CHECK-NEXT: .LBB40_2: ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 18 ; CHECK-NEXT: mul a0, a0, a1 @@ -1202,10 +1730,10 @@ define <vscale x 16 x double> @vfmin_vv_nxv16f64_unmasked(<vscale x 16 x double> ; CHECK-NEXT: vfmin.vv v8, v16, v8 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a2, a1, .LBB29_2 +; CHECK-NEXT: bltu a2, a1, .LBB41_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: .LBB29_2: +; CHECK-NEXT: .LBB41_2: ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll index 9e14852305ca..9498c65ba9a1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll @@ -1,124 +1,438 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN -define <vscale x 1 x half> @nearbyint_nxv1f16(<vscale x 1 x half> %x) { -; CHECK-LABEL: nearbyint_nxv1f16: +define <vscale x 1 x bfloat> @nearbyint_nxv1bf16(<vscale x 1 x bfloat> %x) { +; CHECK-LABEL: nearbyint_nxv1bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI0_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfabs.v v8, v9 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret - %a = call <vscale x 1 x half> @llvm.nearbyint.nxv1f16(<vscale x 1 x half> %x) - ret <vscale x 1 x half> %a + %a = call <vscale x 1 x bfloat> @llvm.nearbyint.nxv1bf16(<vscale x 1 x bfloat> %x) + ret <vscale x 1 x bfloat> %a } -declare <vscale x 1 x half> @llvm.nearbyint.nxv1f16(<vscale x 1 x half>) -define <vscale x 2 x half> @nearbyint_nxv2f16(<vscale x 2 x half> %x) { -; CHECK-LABEL: nearbyint_nxv2f16: +define <vscale x 2 x bfloat> @nearbyint_nxv2bf16(<vscale x 2 x bfloat> %x) { +; CHECK-LABEL: nearbyint_nxv2bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI1_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfabs.v v8, v9 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret - %a = call <vscale x 2 x half> @llvm.nearbyint.nxv2f16(<vscale x 2 x half> %x) - ret <vscale x 2 x half> %a + %a = call <vscale x 2 x bfloat> @llvm.nearbyint.nxv2bf16(<vscale x 2 x bfloat> %x) + ret <vscale x 2 x bfloat> %a } -declare <vscale x 2 x half> @llvm.nearbyint.nxv2f16(<vscale x 2 x half>) -define <vscale x 4 x half> @nearbyint_nxv4f16(<vscale x 4 x half> %x) { -; CHECK-LABEL: nearbyint_nxv4f16: +define <vscale x 4 x bfloat> @nearbyint_nxv4bf16(<vscale x 4 x bfloat> %x) { +; CHECK-LABEL: nearbyint_nxv4bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI2_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfabs.v v8, v10 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret - %a = call <vscale x 4 x half> @llvm.nearbyint.nxv4f16(<vscale x 4 x half> %x) - ret <vscale x 4 x half> %a + %a = call <vscale x 4 x bfloat> @llvm.nearbyint.nxv4bf16(<vscale x 4 x bfloat> %x) + ret <vscale x 4 x bfloat> %a } -declare <vscale x 4 x half> @llvm.nearbyint.nxv4f16(<vscale x 4 x half>) -define <vscale x 8 x half> @nearbyint_nxv8f16(<vscale x 8 x half> %x) { -; CHECK-LABEL: nearbyint_nxv8f16: +define <vscale x 8 x bfloat> @nearbyint_nxv8bf16(<vscale x 8 x bfloat> %x) { +; CHECK-LABEL: nearbyint_nxv8bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI3_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfabs.v v8, v12 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vfsgnj.vv v12, v8, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret - %a = call <vscale x 8 x half> @llvm.nearbyint.nxv8f16(<vscale x 8 x half> %x) - ret <vscale x 8 x half> %a + %a = call <vscale x 8 x bfloat> @llvm.nearbyint.nxv8bf16(<vscale x 8 x bfloat> %x) + ret <vscale x 8 x bfloat> %a } -declare <vscale x 8 x half> @llvm.nearbyint.nxv8f16(<vscale x 8 x half>) -define <vscale x 16 x half> @nearbyint_nxv16f16(<vscale x 16 x half> %x) { -; CHECK-LABEL: nearbyint_nxv16f16: +define <vscale x 16 x bfloat> @nearbyint_nxv16bf16(<vscale x 16 x bfloat> %x) { +; CHECK-LABEL: nearbyint_nxv16bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI4_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v8, v16 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret - %a = call <vscale x 16 x half> @llvm.nearbyint.nxv16f16(<vscale x 16 x half> %x) - ret <vscale x 16 x half> %a + %a = call <vscale x 16 x bfloat> @llvm.nearbyint.nxv16bf16(<vscale x 16 x bfloat> %x) + ret <vscale x 16 x bfloat> %a } -declare <vscale x 16 x half> @llvm.nearbyint.nxv16f16(<vscale x 16 x half>) -define <vscale x 32 x half> @nearbyint_nxv32f16(<vscale x 32 x half> %x) { -; CHECK-LABEL: nearbyint_nxv32f16: +define <vscale x 32 x bfloat> @nearbyint_nxv32bf16(<vscale x 32 x bfloat> %x) { +; CHECK-LABEL: nearbyint_nxv32bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI5_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v16 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v24, fa5 ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: fsflags a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v8, v24 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24 ; CHECK-NEXT: fsflags a0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret + %a = call <vscale x 32 x bfloat> @llvm.nearbyint.nxv32bf16(<vscale x 32 x bfloat> %x) + ret <vscale x 32 x bfloat> %a +} + +define <vscale x 1 x half> @nearbyint_nxv1f16(<vscale x 1 x half> %x) { +; ZVFH-LABEL: nearbyint_nxv1f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, %hi(.LCPI6_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0) +; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: frflags a0 +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: fsflags a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nearbyint_nxv1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: frflags a0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: fsflags a0 +; ZVFHMIN-NEXT: ret + %a = call <vscale x 1 x half> @llvm.nearbyint.nxv1f16(<vscale x 1 x half> %x) + ret <vscale x 1 x half> %a +} +declare <vscale x 1 x half> @llvm.nearbyint.nxv1f16(<vscale x 1 x half>) + +define <vscale x 2 x half> @nearbyint_nxv2f16(<vscale x 2 x half> %x) { +; ZVFH-LABEL: nearbyint_nxv2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, %hi(.LCPI7_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a0) +; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: frflags a0 +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: fsflags a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nearbyint_nxv2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: frflags a0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: fsflags a0 +; ZVFHMIN-NEXT: ret + %a = call <vscale x 2 x half> @llvm.nearbyint.nxv2f16(<vscale x 2 x half> %x) + ret <vscale x 2 x half> %a +} +declare <vscale x 2 x half> @llvm.nearbyint.nxv2f16(<vscale x 2 x half>) + +define <vscale x 4 x half> @nearbyint_nxv4f16(<vscale x 4 x half> %x) { +; ZVFH-LABEL: nearbyint_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, %hi(.LCPI8_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a0) +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: frflags a0 +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: fsflags a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nearbyint_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v10 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: frflags a0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: fsflags a0 +; ZVFHMIN-NEXT: ret + %a = call <vscale x 4 x half> @llvm.nearbyint.nxv4f16(<vscale x 4 x half> %x) + ret <vscale x 4 x half> %a +} +declare <vscale x 4 x half> @llvm.nearbyint.nxv4f16(<vscale x 4 x half>) + +define <vscale x 8 x half> @nearbyint_nxv8f16(<vscale x 8 x half> %x) { +; ZVFH-LABEL: nearbyint_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, %hi(.LCPI9_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a0) +; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFH-NEXT: vfabs.v v10, v8 +; ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; ZVFH-NEXT: frflags a0 +; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; ZVFH-NEXT: fsflags a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nearbyint_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v12 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: frflags a0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: fsflags a0 +; ZVFHMIN-NEXT: ret + %a = call <vscale x 8 x half> @llvm.nearbyint.nxv8f16(<vscale x 8 x half> %x) + ret <vscale x 8 x half> %a +} +declare <vscale x 8 x half> @llvm.nearbyint.nxv8f16(<vscale x 8 x half>) + +define <vscale x 16 x half> @nearbyint_nxv16f16(<vscale x 16 x half> %x) { +; ZVFH-LABEL: nearbyint_nxv16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, %hi(.LCPI10_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a0) +; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFH-NEXT: vfabs.v v12, v8 +; ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; ZVFH-NEXT: frflags a0 +; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; ZVFH-NEXT: fsflags a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nearbyint_nxv16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v16 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: frflags a0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v16, v8, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: fsflags a0 +; ZVFHMIN-NEXT: ret + %a = call <vscale x 16 x half> @llvm.nearbyint.nxv16f16(<vscale x 16 x half> %x) + ret <vscale x 16 x half> %a +} +declare <vscale x 16 x half> @llvm.nearbyint.nxv16f16(<vscale x 16 x half>) + +define <vscale x 32 x half> @nearbyint_nxv32f16(<vscale x 32 x half> %x) { +; ZVFH-LABEL: nearbyint_nxv32f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, %hi(.LCPI11_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a0) +; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; ZVFH-NEXT: vfabs.v v16, v8 +; ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; ZVFH-NEXT: frflags a0 +; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; ZVFH-NEXT: fsflags a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nearbyint_nxv32f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: addi sp, sp, -16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: sub sp, sp, a0 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v24, v16 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; ZVFHMIN-NEXT: frflags a0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFHMIN-NEXT: fsflags a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v24 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: frflags a0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t +; ZVFHMIN-NEXT: addi a1, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v24, v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 +; ZVFHMIN-NEXT: fsflags a0 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add sp, sp, a0 +; ZVFHMIN-NEXT: addi sp, sp, 16 +; ZVFHMIN-NEXT: ret %a = call <vscale x 32 x half> @llvm.nearbyint.nxv32f16(<vscale x 32 x half> %x) ret <vscale x 32 x half> %a } @@ -227,8 +541,8 @@ declare <vscale x 16 x float> @llvm.nearbyint.nxv16f32(<vscale x 16 x float>) define <vscale x 1 x double> @nearbyint_nxv1f64(<vscale x 1 x double> %x) { ; CHECK-LABEL: nearbyint_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI11_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: lui a0, %hi(.LCPI17_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -247,8 +561,8 @@ declare <vscale x 1 x double> @llvm.nearbyint.nxv1f64(<vscale x 1 x double>) define <vscale x 2 x double> @nearbyint_nxv2f64(<vscale x 2 x double> %x) { ; CHECK-LABEL: nearbyint_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI12_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; CHECK-NEXT: lui a0, %hi(.LCPI18_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -267,8 +581,8 @@ declare <vscale x 2 x double> @llvm.nearbyint.nxv2f64(<vscale x 2 x double>) define <vscale x 4 x double> @nearbyint_nxv4f64(<vscale x 4 x double> %x) { ; CHECK-LABEL: nearbyint_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI13_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; CHECK-NEXT: lui a0, %hi(.LCPI19_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -287,8 +601,8 @@ declare <vscale x 4 x double> @llvm.nearbyint.nxv4f64(<vscale x 4 x double>) define <vscale x 8 x double> @nearbyint_nxv8f64(<vscale x 8 x double> %x) { ; CHECK-LABEL: nearbyint_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI14_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; CHECK-NEXT: lui a0, %hi(.LCPI20_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 diff --git a/llvm/test/CodeGen/RISCV/rvv/frint-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/frint-sdnode.ll index fb77b7465494..7fac8949c551 100644 --- a/llvm/test/CodeGen/RISCV/rvv/frint-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/frint-sdnode.ll @@ -1,112 +1,372 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN -define <vscale x 1 x half> @rint_nxv1f16(<vscale x 1 x half> %x) { -; CHECK-LABEL: rint_nxv1f16: +define <vscale x 1 x bfloat> @rint_nxv1bf16(<vscale x 1 x bfloat> %x) { +; CHECK-LABEL: rint_nxv1bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI0_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfabs.v v8, v9 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %a = call <vscale x 1 x bfloat> @llvm.rint.nxv1bf16(<vscale x 1 x bfloat> %x) + ret <vscale x 1 x bfloat> %a +} + +define <vscale x 2 x bfloat> @rint_nxv2bf16(<vscale x 2 x bfloat> %x) { +; CHECK-LABEL: rint_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfabs.v v8, v9 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: ret + %a = call <vscale x 2 x bfloat> @llvm.rint.nxv2bf16(<vscale x 2 x bfloat> %x) + ret <vscale x 2 x bfloat> %a +} + +define <vscale x 4 x bfloat> @rint_nxv4bf16(<vscale x 4 x bfloat> %x) { +; CHECK-LABEL: rint_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfabs.v v8, v10 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %a = call <vscale x 4 x bfloat> @llvm.rint.nxv4bf16(<vscale x 4 x bfloat> %x) + ret <vscale x 4 x bfloat> %a +} + +define <vscale x 8 x bfloat> @rint_nxv8bf16(<vscale x 8 x bfloat> %x) { +; CHECK-LABEL: rint_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfabs.v v8, v12 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vfsgnj.vv v12, v8, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %a = call <vscale x 8 x bfloat> @llvm.rint.nxv8bf16(<vscale x 8 x bfloat> %x) + ret <vscale x 8 x bfloat> %a +} + +define <vscale x 16 x bfloat> @rint_nxv16bf16(<vscale x 16 x bfloat> %x) { +; CHECK-LABEL: rint_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v8, v16 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %a = call <vscale x 16 x bfloat> @llvm.rint.nxv16bf16(<vscale x 16 x bfloat> %x) + ret <vscale x 16 x bfloat> %a +} + +define <vscale x 32 x bfloat> @rint_nxv32bf16(<vscale x 32 x bfloat> %x) { +; CHECK-LABEL: rint_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v16 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v8, v24 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24 +; CHECK-NEXT: ret + %a = call <vscale x 32 x bfloat> @llvm.rint.nxv32bf16(<vscale x 32 x bfloat> %x) + ret <vscale x 32 x bfloat> %a +} + +define <vscale x 1 x half> @rint_nxv1f16(<vscale x 1 x half> %x) { +; ZVFH-LABEL: rint_nxv1f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, %hi(.LCPI6_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0) +; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: rint_nxv1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %a = call <vscale x 1 x half> @llvm.rint.nxv1f16(<vscale x 1 x half> %x) ret <vscale x 1 x half> %a } declare <vscale x 1 x half> @llvm.rint.nxv1f16(<vscale x 1 x half>) define <vscale x 2 x half> @rint_nxv2f16(<vscale x 2 x half> %x) { -; CHECK-LABEL: rint_nxv2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI1_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: rint_nxv2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, %hi(.LCPI7_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a0) +; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: rint_nxv2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %a = call <vscale x 2 x half> @llvm.rint.nxv2f16(<vscale x 2 x half> %x) ret <vscale x 2 x half> %a } declare <vscale x 2 x half> @llvm.rint.nxv2f16(<vscale x 2 x half>) define <vscale x 4 x half> @rint_nxv4f16(<vscale x 4 x half> %x) { -; CHECK-LABEL: rint_nxv4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI2_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: rint_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, %hi(.LCPI8_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a0) +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: rint_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v10 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %a = call <vscale x 4 x half> @llvm.rint.nxv4f16(<vscale x 4 x half> %x) ret <vscale x 4 x half> %a } declare <vscale x 4 x half> @llvm.rint.nxv4f16(<vscale x 4 x half>) define <vscale x 8 x half> @rint_nxv8f16(<vscale x 8 x half> %x) { -; CHECK-LABEL: rint_nxv8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI3_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: rint_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, %hi(.LCPI9_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a0) +; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFH-NEXT: vfabs.v v10, v8 +; ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: rint_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v12 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %a = call <vscale x 8 x half> @llvm.rint.nxv8f16(<vscale x 8 x half> %x) ret <vscale x 8 x half> %a } declare <vscale x 8 x half> @llvm.rint.nxv8f16(<vscale x 8 x half>) define <vscale x 16 x half> @rint_nxv16f16(<vscale x 16 x half> %x) { -; CHECK-LABEL: rint_nxv16f16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI4_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: rint_nxv16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, %hi(.LCPI10_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a0) +; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFH-NEXT: vfabs.v v12, v8 +; ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: rint_nxv16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v16 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v16, v8, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %a = call <vscale x 16 x half> @llvm.rint.nxv16f16(<vscale x 16 x half> %x) ret <vscale x 16 x half> %a } declare <vscale x 16 x half> @llvm.rint.nxv16f16(<vscale x 16 x half>) define <vscale x 32 x half> @rint_nxv32f16(<vscale x 32 x half> %x) { -; CHECK-LABEL: rint_nxv32f16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI5_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: rint_nxv32f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, %hi(.LCPI11_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a0) +; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; ZVFH-NEXT: vfabs.v v16, v8 +; ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: rint_nxv32f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v24, v16 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v24 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v24, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v24, v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 +; ZVFHMIN-NEXT: ret %a = call <vscale x 32 x half> @llvm.rint.nxv32f16(<vscale x 32 x half> %x) ret <vscale x 32 x half> %a } @@ -205,8 +465,8 @@ declare <vscale x 16 x float> @llvm.rint.nxv16f32(<vscale x 16 x float>) define <vscale x 1 x double> @rint_nxv1f64(<vscale x 1 x double> %x) { ; CHECK-LABEL: rint_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI11_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: lui a0, %hi(.LCPI17_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -223,8 +483,8 @@ declare <vscale x 1 x double> @llvm.rint.nxv1f64(<vscale x 1 x double>) define <vscale x 2 x double> @rint_nxv2f64(<vscale x 2 x double> %x) { ; CHECK-LABEL: rint_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI12_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; CHECK-NEXT: lui a0, %hi(.LCPI18_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -241,8 +501,8 @@ declare <vscale x 2 x double> @llvm.rint.nxv2f64(<vscale x 2 x double>) define <vscale x 4 x double> @rint_nxv4f64(<vscale x 4 x double> %x) { ; CHECK-LABEL: rint_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI13_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; CHECK-NEXT: lui a0, %hi(.LCPI19_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -259,8 +519,8 @@ declare <vscale x 4 x double> @llvm.rint.nxv4f64(<vscale x 4 x double>) define <vscale x 8 x double> @rint_nxv8f64(<vscale x 8 x double> %x) { ; CHECK-LABEL: rint_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI14_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; CHECK-NEXT: lui a0, %hi(.LCPI20_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 diff --git a/llvm/test/CodeGen/RISCV/rvv/fround-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fround-sdnode.ll index bb6724eeb320..193773b0c89c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fround-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fround-sdnode.ll @@ -1,126 +1,410 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN ; This file tests the code generation for `llvm.round.*` on scalable vector type. -define <vscale x 1 x half> @round_nxv1f16(<vscale x 1 x half> %x) { -; CHECK-LABEL: round_nxv1f16: +define <vscale x 1 x bfloat> @round_nxv1bf16(<vscale x 1 x bfloat> %x) { +; CHECK-LABEL: round_nxv1bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI0_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfabs.v v8, v9 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: ret - %a = call <vscale x 1 x half> @llvm.round.nxv1f16(<vscale x 1 x half> %x) - ret <vscale x 1 x half> %a + %a = call <vscale x 1 x bfloat> @llvm.round.nxv1bf16(<vscale x 1 x bfloat> %x) + ret <vscale x 1 x bfloat> %a } -declare <vscale x 1 x half> @llvm.round.nxv1f16(<vscale x 1 x half>) -define <vscale x 2 x half> @round_nxv2f16(<vscale x 2 x half> %x) { -; CHECK-LABEL: round_nxv2f16: +define <vscale x 2 x bfloat> @round_nxv2bf16(<vscale x 2 x bfloat> %x) { +; CHECK-LABEL: round_nxv2bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI1_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfabs.v v8, v9 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: ret - %a = call <vscale x 2 x half> @llvm.round.nxv2f16(<vscale x 2 x half> %x) - ret <vscale x 2 x half> %a + %a = call <vscale x 2 x bfloat> @llvm.round.nxv2bf16(<vscale x 2 x bfloat> %x) + ret <vscale x 2 x bfloat> %a } -declare <vscale x 2 x half> @llvm.round.nxv2f16(<vscale x 2 x half>) -define <vscale x 4 x half> @round_nxv4f16(<vscale x 4 x half> %x) { -; CHECK-LABEL: round_nxv4f16: +define <vscale x 4 x bfloat> @round_nxv4bf16(<vscale x 4 x bfloat> %x) { +; CHECK-LABEL: round_nxv4bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI2_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfabs.v v8, v10 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 ; CHECK-NEXT: ret - %a = call <vscale x 4 x half> @llvm.round.nxv4f16(<vscale x 4 x half> %x) - ret <vscale x 4 x half> %a + %a = call <vscale x 4 x bfloat> @llvm.round.nxv4bf16(<vscale x 4 x bfloat> %x) + ret <vscale x 4 x bfloat> %a } -declare <vscale x 4 x half> @llvm.round.nxv4f16(<vscale x 4 x half>) -define <vscale x 8 x half> @round_nxv8f16(<vscale x 8 x half> %x) { -; CHECK-LABEL: round_nxv8f16: +define <vscale x 8 x bfloat> @round_nxv8bf16(<vscale x 8 x bfloat> %x) { +; CHECK-LABEL: round_nxv8bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI3_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfabs.v v8, v12 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vfsgnj.vv v12, v8, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 ; CHECK-NEXT: ret - %a = call <vscale x 8 x half> @llvm.round.nxv8f16(<vscale x 8 x half> %x) - ret <vscale x 8 x half> %a + %a = call <vscale x 8 x bfloat> @llvm.round.nxv8bf16(<vscale x 8 x bfloat> %x) + ret <vscale x 8 x bfloat> %a } -declare <vscale x 8 x half> @llvm.round.nxv8f16(<vscale x 8 x half>) -define <vscale x 16 x half> @round_nxv16f16(<vscale x 16 x half> %x) { -; CHECK-LABEL: round_nxv16f16: +define <vscale x 16 x bfloat> @round_nxv16bf16(<vscale x 16 x bfloat> %x) { +; CHECK-LABEL: round_nxv16bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI4_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v8, v16 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 ; CHECK-NEXT: ret - %a = call <vscale x 16 x half> @llvm.round.nxv16f16(<vscale x 16 x half> %x) - ret <vscale x 16 x half> %a + %a = call <vscale x 16 x bfloat> @llvm.round.nxv16bf16(<vscale x 16 x bfloat> %x) + ret <vscale x 16 x bfloat> %a } -declare <vscale x 16 x half> @llvm.round.nxv16f16(<vscale x 16 x half>) -define <vscale x 32 x half> @round_nxv32f16(<vscale x 32 x half> %x) { -; CHECK-LABEL: round_nxv32f16: +define <vscale x 32 x bfloat> @round_nxv32bf16(<vscale x 32 x bfloat> %x) { +; CHECK-LABEL: round_nxv32bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI5_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v16 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v24, fa5 ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v16 +; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 ; CHECK-NEXT: ret + %a = call <vscale x 32 x bfloat> @llvm.round.nxv32bf16(<vscale x 32 x bfloat> %x) + ret <vscale x 32 x bfloat> %a +} + +define <vscale x 1 x half> @round_nxv1f16(<vscale x 1 x half> %x) { +; ZVFH-LABEL: round_nxv1f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, %hi(.LCPI6_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0) +; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: round_nxv1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret + %a = call <vscale x 1 x half> @llvm.round.nxv1f16(<vscale x 1 x half> %x) + ret <vscale x 1 x half> %a +} +declare <vscale x 1 x half> @llvm.round.nxv1f16(<vscale x 1 x half>) + +define <vscale x 2 x half> @round_nxv2f16(<vscale x 2 x half> %x) { +; ZVFH-LABEL: round_nxv2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, %hi(.LCPI7_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a0) +; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: round_nxv2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret + %a = call <vscale x 2 x half> @llvm.round.nxv2f16(<vscale x 2 x half> %x) + ret <vscale x 2 x half> %a +} +declare <vscale x 2 x half> @llvm.round.nxv2f16(<vscale x 2 x half>) + +define <vscale x 4 x half> @round_nxv4f16(<vscale x 4 x half> %x) { +; ZVFH-LABEL: round_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, %hi(.LCPI8_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a0) +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: round_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v10 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret + %a = call <vscale x 4 x half> @llvm.round.nxv4f16(<vscale x 4 x half> %x) + ret <vscale x 4 x half> %a +} +declare <vscale x 4 x half> @llvm.round.nxv4f16(<vscale x 4 x half>) + +define <vscale x 8 x half> @round_nxv8f16(<vscale x 8 x half> %x) { +; ZVFH-LABEL: round_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, %hi(.LCPI9_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a0) +; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFH-NEXT: vfabs.v v10, v8 +; ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: round_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v12 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret + %a = call <vscale x 8 x half> @llvm.round.nxv8f16(<vscale x 8 x half> %x) + ret <vscale x 8 x half> %a +} +declare <vscale x 8 x half> @llvm.round.nxv8f16(<vscale x 8 x half>) + +define <vscale x 16 x half> @round_nxv16f16(<vscale x 16 x half> %x) { +; ZVFH-LABEL: round_nxv16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, %hi(.LCPI10_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a0) +; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFH-NEXT: vfabs.v v12, v8 +; ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: round_nxv16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v16 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v16, v8, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret + %a = call <vscale x 16 x half> @llvm.round.nxv16f16(<vscale x 16 x half> %x) + ret <vscale x 16 x half> %a +} +declare <vscale x 16 x half> @llvm.round.nxv16f16(<vscale x 16 x half>) + +define <vscale x 32 x half> @round_nxv32f16(<vscale x 32 x half> %x) { +; ZVFH-LABEL: round_nxv32f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, %hi(.LCPI11_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a0) +; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; ZVFH-NEXT: vfabs.v v16, v8 +; ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: round_nxv32f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v24, v16 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v24, v16 +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: ret %a = call <vscale x 32 x half> @llvm.round.nxv32f16(<vscale x 32 x half> %x) ret <vscale x 32 x half> %a } @@ -229,8 +513,8 @@ declare <vscale x 16 x float> @llvm.round.nxv16f32(<vscale x 16 x float>) define <vscale x 1 x double> @round_nxv1f64(<vscale x 1 x double> %x) { ; CHECK-LABEL: round_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI11_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: lui a0, %hi(.LCPI17_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -249,8 +533,8 @@ declare <vscale x 1 x double> @llvm.round.nxv1f64(<vscale x 1 x double>) define <vscale x 2 x double> @round_nxv2f64(<vscale x 2 x double> %x) { ; CHECK-LABEL: round_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI12_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; CHECK-NEXT: lui a0, %hi(.LCPI18_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -269,8 +553,8 @@ declare <vscale x 2 x double> @llvm.round.nxv2f64(<vscale x 2 x double>) define <vscale x 4 x double> @round_nxv4f64(<vscale x 4 x double> %x) { ; CHECK-LABEL: round_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI13_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; CHECK-NEXT: lui a0, %hi(.LCPI19_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -289,8 +573,8 @@ declare <vscale x 4 x double> @llvm.round.nxv4f64(<vscale x 4 x double>) define <vscale x 8 x double> @round_nxv8f64(<vscale x 8 x double> %x) { ; CHECK-LABEL: round_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI14_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; CHECK-NEXT: lui a0, %hi(.LCPI20_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 diff --git a/llvm/test/CodeGen/RISCV/rvv/froundeven-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/froundeven-sdnode.ll index 6f5207a25518..052ee2d3a43c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/froundeven-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/froundeven-sdnode.ll @@ -1,126 +1,409 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN ; This file tests the code generation for `llvm.roundeven.*` on scalable vector type. - -define <vscale x 1 x half> @roundeven_nxv1f16(<vscale x 1 x half> %x) { -; CHECK-LABEL: roundeven_nxv1f16: +define <vscale x 1 x bfloat> @roundeven_nxv1bf16(<vscale x 1 x bfloat> %x) { +; CHECK-LABEL: roundeven_nxv1bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI0_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfabs.v v8, v9 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: ret - %a = call <vscale x 1 x half> @llvm.roundeven.nxv1f16(<vscale x 1 x half> %x) - ret <vscale x 1 x half> %a + %a = call <vscale x 1 x bfloat> @llvm.roundeven.nxv1bf16(<vscale x 1 x bfloat> %x) + ret <vscale x 1 x bfloat> %a } -declare <vscale x 1 x half> @llvm.roundeven.nxv1f16(<vscale x 1 x half>) -define <vscale x 2 x half> @roundeven_nxv2f16(<vscale x 2 x half> %x) { -; CHECK-LABEL: roundeven_nxv2f16: +define <vscale x 2 x bfloat> @roundeven_nxv2bf16(<vscale x 2 x bfloat> %x) { +; CHECK-LABEL: roundeven_nxv2bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI1_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfabs.v v8, v9 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: ret - %a = call <vscale x 2 x half> @llvm.roundeven.nxv2f16(<vscale x 2 x half> %x) - ret <vscale x 2 x half> %a + %a = call <vscale x 2 x bfloat> @llvm.roundeven.nxv2bf16(<vscale x 2 x bfloat> %x) + ret <vscale x 2 x bfloat> %a } -declare <vscale x 2 x half> @llvm.roundeven.nxv2f16(<vscale x 2 x half>) -define <vscale x 4 x half> @roundeven_nxv4f16(<vscale x 4 x half> %x) { -; CHECK-LABEL: roundeven_nxv4f16: +define <vscale x 4 x bfloat> @roundeven_nxv4bf16(<vscale x 4 x bfloat> %x) { +; CHECK-LABEL: roundeven_nxv4bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI2_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfabs.v v8, v10 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 ; CHECK-NEXT: ret - %a = call <vscale x 4 x half> @llvm.roundeven.nxv4f16(<vscale x 4 x half> %x) - ret <vscale x 4 x half> %a + %a = call <vscale x 4 x bfloat> @llvm.roundeven.nxv4bf16(<vscale x 4 x bfloat> %x) + ret <vscale x 4 x bfloat> %a } -declare <vscale x 4 x half> @llvm.roundeven.nxv4f16(<vscale x 4 x half>) -define <vscale x 8 x half> @roundeven_nxv8f16(<vscale x 8 x half> %x) { -; CHECK-LABEL: roundeven_nxv8f16: +define <vscale x 8 x bfloat> @roundeven_nxv8bf16(<vscale x 8 x bfloat> %x) { +; CHECK-LABEL: roundeven_nxv8bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI3_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfabs.v v8, v12 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vfsgnj.vv v12, v8, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 ; CHECK-NEXT: ret - %a = call <vscale x 8 x half> @llvm.roundeven.nxv8f16(<vscale x 8 x half> %x) - ret <vscale x 8 x half> %a + %a = call <vscale x 8 x bfloat> @llvm.roundeven.nxv8bf16(<vscale x 8 x bfloat> %x) + ret <vscale x 8 x bfloat> %a } -declare <vscale x 8 x half> @llvm.roundeven.nxv8f16(<vscale x 8 x half>) -define <vscale x 16 x half> @roundeven_nxv16f16(<vscale x 16 x half> %x) { -; CHECK-LABEL: roundeven_nxv16f16: +define <vscale x 16 x bfloat> @roundeven_nxv16bf16(<vscale x 16 x bfloat> %x) { +; CHECK-LABEL: roundeven_nxv16bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI4_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v8, v16 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 ; CHECK-NEXT: ret - %a = call <vscale x 16 x half> @llvm.roundeven.nxv16f16(<vscale x 16 x half> %x) - ret <vscale x 16 x half> %a + %a = call <vscale x 16 x bfloat> @llvm.roundeven.nxv16bf16(<vscale x 16 x bfloat> %x) + ret <vscale x 16 x bfloat> %a } -declare <vscale x 16 x half> @llvm.roundeven.nxv16f16(<vscale x 16 x half>) -define <vscale x 32 x half> @roundeven_nxv32f16(<vscale x 32 x half> %x) { -; CHECK-LABEL: roundeven_nxv32f16: +define <vscale x 32 x bfloat> @roundeven_nxv32bf16(<vscale x 32 x bfloat> %x) { +; CHECK-LABEL: roundeven_nxv32bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI5_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v16 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v24, fa5 ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v16 +; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 ; CHECK-NEXT: ret + %a = call <vscale x 32 x bfloat> @llvm.roundeven.nxv32bf16(<vscale x 32 x bfloat> %x) + ret <vscale x 32 x bfloat> %a +} + +define <vscale x 1 x half> @roundeven_nxv1f16(<vscale x 1 x half> %x) { +; ZVFH-LABEL: roundeven_nxv1f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, %hi(.LCPI6_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0) +; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: roundeven_nxv1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret + %a = call <vscale x 1 x half> @llvm.roundeven.nxv1f16(<vscale x 1 x half> %x) + ret <vscale x 1 x half> %a +} +declare <vscale x 1 x half> @llvm.roundeven.nxv1f16(<vscale x 1 x half>) + +define <vscale x 2 x half> @roundeven_nxv2f16(<vscale x 2 x half> %x) { +; ZVFH-LABEL: roundeven_nxv2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, %hi(.LCPI7_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a0) +; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: roundeven_nxv2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret + %a = call <vscale x 2 x half> @llvm.roundeven.nxv2f16(<vscale x 2 x half> %x) + ret <vscale x 2 x half> %a +} +declare <vscale x 2 x half> @llvm.roundeven.nxv2f16(<vscale x 2 x half>) + +define <vscale x 4 x half> @roundeven_nxv4f16(<vscale x 4 x half> %x) { +; ZVFH-LABEL: roundeven_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, %hi(.LCPI8_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a0) +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: roundeven_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v10 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret + %a = call <vscale x 4 x half> @llvm.roundeven.nxv4f16(<vscale x 4 x half> %x) + ret <vscale x 4 x half> %a +} +declare <vscale x 4 x half> @llvm.roundeven.nxv4f16(<vscale x 4 x half>) + +define <vscale x 8 x half> @roundeven_nxv8f16(<vscale x 8 x half> %x) { +; ZVFH-LABEL: roundeven_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, %hi(.LCPI9_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a0) +; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFH-NEXT: vfabs.v v10, v8 +; ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: roundeven_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v12 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret + %a = call <vscale x 8 x half> @llvm.roundeven.nxv8f16(<vscale x 8 x half> %x) + ret <vscale x 8 x half> %a +} +declare <vscale x 8 x half> @llvm.roundeven.nxv8f16(<vscale x 8 x half>) + +define <vscale x 16 x half> @roundeven_nxv16f16(<vscale x 16 x half> %x) { +; ZVFH-LABEL: roundeven_nxv16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, %hi(.LCPI10_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a0) +; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFH-NEXT: vfabs.v v12, v8 +; ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: roundeven_nxv16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v16 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v16, v8, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret + %a = call <vscale x 16 x half> @llvm.roundeven.nxv16f16(<vscale x 16 x half> %x) + ret <vscale x 16 x half> %a +} +declare <vscale x 16 x half> @llvm.roundeven.nxv16f16(<vscale x 16 x half>) + +define <vscale x 32 x half> @roundeven_nxv32f16(<vscale x 32 x half> %x) { +; ZVFH-LABEL: roundeven_nxv32f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, %hi(.LCPI11_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a0) +; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; ZVFH-NEXT: vfabs.v v16, v8 +; ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: roundeven_nxv32f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v24, v16 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v24, v16 +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: ret %a = call <vscale x 32 x half> @llvm.roundeven.nxv32f16(<vscale x 32 x half> %x) ret <vscale x 32 x half> %a } @@ -229,8 +512,8 @@ declare <vscale x 16 x float> @llvm.roundeven.nxv16f32(<vscale x 16 x float>) define <vscale x 1 x double> @roundeven_nxv1f64(<vscale x 1 x double> %x) { ; CHECK-LABEL: roundeven_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI11_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: lui a0, %hi(.LCPI17_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -249,8 +532,8 @@ declare <vscale x 1 x double> @llvm.roundeven.nxv1f64(<vscale x 1 x double>) define <vscale x 2 x double> @roundeven_nxv2f64(<vscale x 2 x double> %x) { ; CHECK-LABEL: roundeven_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI12_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; CHECK-NEXT: lui a0, %hi(.LCPI18_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -269,8 +552,8 @@ declare <vscale x 2 x double> @llvm.roundeven.nxv2f64(<vscale x 2 x double>) define <vscale x 4 x double> @roundeven_nxv4f64(<vscale x 4 x double> %x) { ; CHECK-LABEL: roundeven_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI13_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; CHECK-NEXT: lui a0, %hi(.LCPI19_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -289,8 +572,8 @@ declare <vscale x 4 x double> @llvm.roundeven.nxv4f64(<vscale x 4 x double>) define <vscale x 8 x double> @roundeven_nxv8f64(<vscale x 8 x double> %x) { ; CHECK-LABEL: roundeven_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI14_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; CHECK-NEXT: lui a0, %hi(.LCPI20_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 diff --git a/llvm/test/CodeGen/RISCV/rvv/ftrunc-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ftrunc-sdnode.ll index 8841232e7f76..b29b24a9ce7b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ftrunc-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ftrunc-sdnode.ll @@ -1,112 +1,372 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN -define <vscale x 1 x half> @trunc_nxv1f16(<vscale x 1 x half> %x) { -; CHECK-LABEL: trunc_nxv1f16: +define <vscale x 1 x bfloat> @trunc_nxv1bf16(<vscale x 1 x bfloat> %x) { +; CHECK-LABEL: trunc_nxv1bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI0_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfabs.v v8, v9 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %a = call <vscale x 1 x bfloat> @llvm.trunc.nxv1bf16(<vscale x 1 x bfloat> %x) + ret <vscale x 1 x bfloat> %a +} + +define <vscale x 2 x bfloat> @trunc_nxv2bf16(<vscale x 2 x bfloat> %x) { +; CHECK-LABEL: trunc_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfabs.v v8, v9 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: ret + %a = call <vscale x 2 x bfloat> @llvm.trunc.nxv2bf16(<vscale x 2 x bfloat> %x) + ret <vscale x 2 x bfloat> %a +} + +define <vscale x 4 x bfloat> @trunc_nxv4bf16(<vscale x 4 x bfloat> %x) { +; CHECK-LABEL: trunc_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfabs.v v8, v10 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %a = call <vscale x 4 x bfloat> @llvm.trunc.nxv4bf16(<vscale x 4 x bfloat> %x) + ret <vscale x 4 x bfloat> %a +} + +define <vscale x 8 x bfloat> @trunc_nxv8bf16(<vscale x 8 x bfloat> %x) { +; CHECK-LABEL: trunc_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfabs.v v8, v12 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vfsgnj.vv v12, v8, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %a = call <vscale x 8 x bfloat> @llvm.trunc.nxv8bf16(<vscale x 8 x bfloat> %x) + ret <vscale x 8 x bfloat> %a +} + +define <vscale x 16 x bfloat> @trunc_nxv16bf16(<vscale x 16 x bfloat> %x) { +; CHECK-LABEL: trunc_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v8, v16 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %a = call <vscale x 16 x bfloat> @llvm.trunc.nxv16bf16(<vscale x 16 x bfloat> %x) + ret <vscale x 16 x bfloat> %a +} + +define <vscale x 32 x bfloat> @trunc_nxv32bf16(<vscale x 32 x bfloat> %x) { +; CHECK-LABEL: trunc_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v16 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: vfcvt.rtz.x.f.v v24, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v8, v24 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v24, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24 +; CHECK-NEXT: ret + %a = call <vscale x 32 x bfloat> @llvm.trunc.nxv32bf16(<vscale x 32 x bfloat> %x) + ret <vscale x 32 x bfloat> %a +} + +define <vscale x 1 x half> @trunc_nxv1f16(<vscale x 1 x half> %x) { +; ZVFH-LABEL: trunc_nxv1f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, %hi(.LCPI6_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0) +; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: trunc_nxv1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %a = call <vscale x 1 x half> @llvm.trunc.nxv1f16(<vscale x 1 x half> %x) ret <vscale x 1 x half> %a } declare <vscale x 1 x half> @llvm.trunc.nxv1f16(<vscale x 1 x half>) define <vscale x 2 x half> @trunc_nxv2f16(<vscale x 2 x half> %x) { -; CHECK-LABEL: trunc_nxv2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI1_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: trunc_nxv2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, %hi(.LCPI7_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a0) +; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: trunc_nxv2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %a = call <vscale x 2 x half> @llvm.trunc.nxv2f16(<vscale x 2 x half> %x) ret <vscale x 2 x half> %a } declare <vscale x 2 x half> @llvm.trunc.nxv2f16(<vscale x 2 x half>) define <vscale x 4 x half> @trunc_nxv4f16(<vscale x 4 x half> %x) { -; CHECK-LABEL: trunc_nxv4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI2_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: trunc_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, %hi(.LCPI8_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a0) +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: trunc_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v10 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v8, v10, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %a = call <vscale x 4 x half> @llvm.trunc.nxv4f16(<vscale x 4 x half> %x) ret <vscale x 4 x half> %a } declare <vscale x 4 x half> @llvm.trunc.nxv4f16(<vscale x 4 x half>) define <vscale x 8 x half> @trunc_nxv8f16(<vscale x 8 x half> %x) { -; CHECK-LABEL: trunc_nxv8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI3_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: trunc_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, %hi(.LCPI9_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a0) +; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFH-NEXT: vfabs.v v10, v8 +; ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; ZVFH-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: trunc_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v12 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v8, v12, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %a = call <vscale x 8 x half> @llvm.trunc.nxv8f16(<vscale x 8 x half> %x) ret <vscale x 8 x half> %a } declare <vscale x 8 x half> @llvm.trunc.nxv8f16(<vscale x 8 x half>) define <vscale x 16 x half> @trunc_nxv16f16(<vscale x 16 x half> %x) { -; CHECK-LABEL: trunc_nxv16f16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI4_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: trunc_nxv16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, %hi(.LCPI10_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a0) +; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFH-NEXT: vfabs.v v12, v8 +; ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; ZVFH-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t +; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: trunc_nxv16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v16 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v8, v16, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v16, v8, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %a = call <vscale x 16 x half> @llvm.trunc.nxv16f16(<vscale x 16 x half> %x) ret <vscale x 16 x half> %a } declare <vscale x 16 x half> @llvm.trunc.nxv16f16(<vscale x 16 x half>) define <vscale x 32 x half> @trunc_nxv32f16(<vscale x 32 x half> %x) { -; CHECK-LABEL: trunc_nxv32f16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI5_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: trunc_nxv32f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, %hi(.LCPI11_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a0) +; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; ZVFH-NEXT: vfabs.v v16, v8 +; ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; ZVFH-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t +; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: trunc_nxv32f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v24, v16 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v24, v16, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v24 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v16, v24, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v24, v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 +; ZVFHMIN-NEXT: ret %a = call <vscale x 32 x half> @llvm.trunc.nxv32f16(<vscale x 32 x half> %x) ret <vscale x 32 x half> %a } @@ -205,8 +465,8 @@ declare <vscale x 16 x float> @llvm.trunc.nxv16f32(<vscale x 16 x float>) define <vscale x 1 x double> @trunc_nxv1f64(<vscale x 1 x double> %x) { ; CHECK-LABEL: trunc_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI11_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: lui a0, %hi(.LCPI17_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -223,8 +483,8 @@ declare <vscale x 1 x double> @llvm.trunc.nxv1f64(<vscale x 1 x double>) define <vscale x 2 x double> @trunc_nxv2f64(<vscale x 2 x double> %x) { ; CHECK-LABEL: trunc_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI12_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; CHECK-NEXT: lui a0, %hi(.LCPI18_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -241,8 +501,8 @@ declare <vscale x 2 x double> @llvm.trunc.nxv2f64(<vscale x 2 x double>) define <vscale x 4 x double> @trunc_nxv4f64(<vscale x 4 x double> %x) { ; CHECK-LABEL: trunc_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI13_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; CHECK-NEXT: lui a0, %hi(.LCPI19_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -259,8 +519,8 @@ declare <vscale x 4 x double> @llvm.trunc.nxv4f64(<vscale x 4 x double>) define <vscale x 8 x double> @trunc_nxv8f64(<vscale x 8 x double> %x) { ; CHECK-LABEL: trunc_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI14_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; CHECK-NEXT: lui a0, %hi(.LCPI20_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 diff --git a/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll index 2a915529e61d..9d0cb22eb5f4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll @@ -24,8 +24,7 @@ define <vscale x 2 x i1> @reverse_nxv2i1(<vscale x 2 x i1> %a) { ; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v9, v9, a0 ; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v10, v8, v9 -; RV32-BITS-UNKNOWN-NEXT: vand.vi v8, v10, 1 -; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0 +; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v10, 0 ; RV32-BITS-UNKNOWN-NEXT: ret ; ; RV32-BITS-256-LABEL: reverse_nxv2i1: @@ -39,8 +38,7 @@ define <vscale x 2 x i1> @reverse_nxv2i1(<vscale x 2 x i1> %a) { ; RV32-BITS-256-NEXT: vid.v v9 ; RV32-BITS-256-NEXT: vrsub.vx v9, v9, a0 ; RV32-BITS-256-NEXT: vrgather.vv v10, v8, v9 -; RV32-BITS-256-NEXT: vand.vi v8, v10, 1 -; RV32-BITS-256-NEXT: vmsne.vi v0, v8, 0 +; RV32-BITS-256-NEXT: vmsne.vi v0, v10, 0 ; RV32-BITS-256-NEXT: ret ; ; RV32-BITS-512-LABEL: reverse_nxv2i1: @@ -54,8 +52,7 @@ define <vscale x 2 x i1> @reverse_nxv2i1(<vscale x 2 x i1> %a) { ; RV32-BITS-512-NEXT: vid.v v9 ; RV32-BITS-512-NEXT: vrsub.vx v9, v9, a0 ; RV32-BITS-512-NEXT: vrgather.vv v10, v8, v9 -; RV32-BITS-512-NEXT: vand.vi v8, v10, 1 -; RV32-BITS-512-NEXT: vmsne.vi v0, v8, 0 +; RV32-BITS-512-NEXT: vmsne.vi v0, v10, 0 ; RV32-BITS-512-NEXT: ret ; ; RV64-BITS-UNKNOWN-LABEL: reverse_nxv2i1: @@ -71,8 +68,7 @@ define <vscale x 2 x i1> @reverse_nxv2i1(<vscale x 2 x i1> %a) { ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v9, v9, a0 ; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v10, v8, v9 -; RV64-BITS-UNKNOWN-NEXT: vand.vi v8, v10, 1 -; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0 +; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v10, 0 ; RV64-BITS-UNKNOWN-NEXT: ret ; ; RV64-BITS-256-LABEL: reverse_nxv2i1: @@ -86,8 +82,7 @@ define <vscale x 2 x i1> @reverse_nxv2i1(<vscale x 2 x i1> %a) { ; RV64-BITS-256-NEXT: vid.v v9 ; RV64-BITS-256-NEXT: vrsub.vx v9, v9, a0 ; RV64-BITS-256-NEXT: vrgather.vv v10, v8, v9 -; RV64-BITS-256-NEXT: vand.vi v8, v10, 1 -; RV64-BITS-256-NEXT: vmsne.vi v0, v8, 0 +; RV64-BITS-256-NEXT: vmsne.vi v0, v10, 0 ; RV64-BITS-256-NEXT: ret ; ; RV64-BITS-512-LABEL: reverse_nxv2i1: @@ -101,8 +96,7 @@ define <vscale x 2 x i1> @reverse_nxv2i1(<vscale x 2 x i1> %a) { ; RV64-BITS-512-NEXT: vid.v v9 ; RV64-BITS-512-NEXT: vrsub.vx v9, v9, a0 ; RV64-BITS-512-NEXT: vrgather.vv v10, v8, v9 -; RV64-BITS-512-NEXT: vand.vi v8, v10, 1 -; RV64-BITS-512-NEXT: vmsne.vi v0, v8, 0 +; RV64-BITS-512-NEXT: vmsne.vi v0, v10, 0 ; RV64-BITS-512-NEXT: ret %res = call <vscale x 2 x i1> @llvm.vector.reverse.nxv2i1(<vscale x 2 x i1> %a) ret <vscale x 2 x i1> %res @@ -122,8 +116,7 @@ define <vscale x 4 x i1> @reverse_nxv4i1(<vscale x 4 x i1> %a) { ; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v9, v9, a0 ; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v10, v8, v9 -; RV32-BITS-UNKNOWN-NEXT: vand.vi v8, v10, 1 -; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0 +; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v10, 0 ; RV32-BITS-UNKNOWN-NEXT: ret ; ; RV32-BITS-256-LABEL: reverse_nxv4i1: @@ -137,8 +130,7 @@ define <vscale x 4 x i1> @reverse_nxv4i1(<vscale x 4 x i1> %a) { ; RV32-BITS-256-NEXT: vid.v v9 ; RV32-BITS-256-NEXT: vrsub.vx v9, v9, a0 ; RV32-BITS-256-NEXT: vrgather.vv v10, v8, v9 -; RV32-BITS-256-NEXT: vand.vi v8, v10, 1 -; RV32-BITS-256-NEXT: vmsne.vi v0, v8, 0 +; RV32-BITS-256-NEXT: vmsne.vi v0, v10, 0 ; RV32-BITS-256-NEXT: ret ; ; RV32-BITS-512-LABEL: reverse_nxv4i1: @@ -152,8 +144,7 @@ define <vscale x 4 x i1> @reverse_nxv4i1(<vscale x 4 x i1> %a) { ; RV32-BITS-512-NEXT: vid.v v9 ; RV32-BITS-512-NEXT: vrsub.vx v9, v9, a0 ; RV32-BITS-512-NEXT: vrgather.vv v10, v8, v9 -; RV32-BITS-512-NEXT: vand.vi v8, v10, 1 -; RV32-BITS-512-NEXT: vmsne.vi v0, v8, 0 +; RV32-BITS-512-NEXT: vmsne.vi v0, v10, 0 ; RV32-BITS-512-NEXT: ret ; ; RV64-BITS-UNKNOWN-LABEL: reverse_nxv4i1: @@ -169,8 +160,7 @@ define <vscale x 4 x i1> @reverse_nxv4i1(<vscale x 4 x i1> %a) { ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v9, v9, a0 ; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v10, v8, v9 -; RV64-BITS-UNKNOWN-NEXT: vand.vi v8, v10, 1 -; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0 +; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v10, 0 ; RV64-BITS-UNKNOWN-NEXT: ret ; ; RV64-BITS-256-LABEL: reverse_nxv4i1: @@ -184,8 +174,7 @@ define <vscale x 4 x i1> @reverse_nxv4i1(<vscale x 4 x i1> %a) { ; RV64-BITS-256-NEXT: vid.v v9 ; RV64-BITS-256-NEXT: vrsub.vx v9, v9, a0 ; RV64-BITS-256-NEXT: vrgather.vv v10, v8, v9 -; RV64-BITS-256-NEXT: vand.vi v8, v10, 1 -; RV64-BITS-256-NEXT: vmsne.vi v0, v8, 0 +; RV64-BITS-256-NEXT: vmsne.vi v0, v10, 0 ; RV64-BITS-256-NEXT: ret ; ; RV64-BITS-512-LABEL: reverse_nxv4i1: @@ -199,8 +188,7 @@ define <vscale x 4 x i1> @reverse_nxv4i1(<vscale x 4 x i1> %a) { ; RV64-BITS-512-NEXT: vid.v v9 ; RV64-BITS-512-NEXT: vrsub.vx v9, v9, a0 ; RV64-BITS-512-NEXT: vrgather.vv v10, v8, v9 -; RV64-BITS-512-NEXT: vand.vi v8, v10, 1 -; RV64-BITS-512-NEXT: vmsne.vi v0, v8, 0 +; RV64-BITS-512-NEXT: vmsne.vi v0, v10, 0 ; RV64-BITS-512-NEXT: ret %res = call <vscale x 4 x i1> @llvm.vector.reverse.nxv4i1(<vscale x 4 x i1> %a) ret <vscale x 4 x i1> %res @@ -219,8 +207,7 @@ define <vscale x 8 x i1> @reverse_nxv8i1(<vscale x 8 x i1> %a) { ; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v10, v10, a0 ; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v9, v8, v10 -; RV32-BITS-UNKNOWN-NEXT: vand.vi v8, v9, 1 -; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0 +; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v9, 0 ; RV32-BITS-UNKNOWN-NEXT: ret ; ; RV32-BITS-256-LABEL: reverse_nxv8i1: @@ -233,8 +220,7 @@ define <vscale x 8 x i1> @reverse_nxv8i1(<vscale x 8 x i1> %a) { ; RV32-BITS-256-NEXT: vid.v v9 ; RV32-BITS-256-NEXT: vrsub.vx v9, v9, a0 ; RV32-BITS-256-NEXT: vrgather.vv v10, v8, v9 -; RV32-BITS-256-NEXT: vand.vi v8, v10, 1 -; RV32-BITS-256-NEXT: vmsne.vi v0, v8, 0 +; RV32-BITS-256-NEXT: vmsne.vi v0, v10, 0 ; RV32-BITS-256-NEXT: ret ; ; RV32-BITS-512-LABEL: reverse_nxv8i1: @@ -247,8 +233,7 @@ define <vscale x 8 x i1> @reverse_nxv8i1(<vscale x 8 x i1> %a) { ; RV32-BITS-512-NEXT: vid.v v9 ; RV32-BITS-512-NEXT: vrsub.vx v9, v9, a0 ; RV32-BITS-512-NEXT: vrgather.vv v10, v8, v9 -; RV32-BITS-512-NEXT: vand.vi v8, v10, 1 -; RV32-BITS-512-NEXT: vmsne.vi v0, v8, 0 +; RV32-BITS-512-NEXT: vmsne.vi v0, v10, 0 ; RV32-BITS-512-NEXT: ret ; ; RV64-BITS-UNKNOWN-LABEL: reverse_nxv8i1: @@ -263,8 +248,7 @@ define <vscale x 8 x i1> @reverse_nxv8i1(<vscale x 8 x i1> %a) { ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v10, v10, a0 ; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v9, v8, v10 -; RV64-BITS-UNKNOWN-NEXT: vand.vi v8, v9, 1 -; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0 +; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v9, 0 ; RV64-BITS-UNKNOWN-NEXT: ret ; ; RV64-BITS-256-LABEL: reverse_nxv8i1: @@ -277,8 +261,7 @@ define <vscale x 8 x i1> @reverse_nxv8i1(<vscale x 8 x i1> %a) { ; RV64-BITS-256-NEXT: vid.v v9 ; RV64-BITS-256-NEXT: vrsub.vx v9, v9, a0 ; RV64-BITS-256-NEXT: vrgather.vv v10, v8, v9 -; RV64-BITS-256-NEXT: vand.vi v8, v10, 1 -; RV64-BITS-256-NEXT: vmsne.vi v0, v8, 0 +; RV64-BITS-256-NEXT: vmsne.vi v0, v10, 0 ; RV64-BITS-256-NEXT: ret ; ; RV64-BITS-512-LABEL: reverse_nxv8i1: @@ -291,8 +274,7 @@ define <vscale x 8 x i1> @reverse_nxv8i1(<vscale x 8 x i1> %a) { ; RV64-BITS-512-NEXT: vid.v v9 ; RV64-BITS-512-NEXT: vrsub.vx v9, v9, a0 ; RV64-BITS-512-NEXT: vrgather.vv v10, v8, v9 -; RV64-BITS-512-NEXT: vand.vi v8, v10, 1 -; RV64-BITS-512-NEXT: vmsne.vi v0, v8, 0 +; RV64-BITS-512-NEXT: vmsne.vi v0, v10, 0 ; RV64-BITS-512-NEXT: ret %res = call <vscale x 8 x i1> @llvm.vector.reverse.nxv8i1(<vscale x 8 x i1> %a) ret <vscale x 8 x i1> %res @@ -313,8 +295,7 @@ define <vscale x 16 x i1> @reverse_nxv16i1(<vscale x 16 x i1> %a) { ; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v13, v10, v8 ; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v12, v11, v8 ; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vand.vi v8, v12, 1 -; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0 +; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v12, 0 ; RV32-BITS-UNKNOWN-NEXT: ret ; ; RV32-BITS-256-LABEL: reverse_nxv16i1: @@ -331,8 +312,7 @@ define <vscale x 16 x i1> @reverse_nxv16i1(<vscale x 16 x i1> %a) { ; RV32-BITS-256-NEXT: vrgather.vv v13, v10, v8 ; RV32-BITS-256-NEXT: vrgather.vv v12, v11, v8 ; RV32-BITS-256-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; RV32-BITS-256-NEXT: vand.vi v8, v12, 1 -; RV32-BITS-256-NEXT: vmsne.vi v0, v8, 0 +; RV32-BITS-256-NEXT: vmsne.vi v0, v12, 0 ; RV32-BITS-256-NEXT: ret ; ; RV32-BITS-512-LABEL: reverse_nxv16i1: @@ -349,8 +329,7 @@ define <vscale x 16 x i1> @reverse_nxv16i1(<vscale x 16 x i1> %a) { ; RV32-BITS-512-NEXT: vrgather.vv v13, v10, v8 ; RV32-BITS-512-NEXT: vrgather.vv v12, v11, v8 ; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; RV32-BITS-512-NEXT: vand.vi v8, v12, 1 -; RV32-BITS-512-NEXT: vmsne.vi v0, v8, 0 +; RV32-BITS-512-NEXT: vmsne.vi v0, v12, 0 ; RV32-BITS-512-NEXT: ret ; ; RV64-BITS-UNKNOWN-LABEL: reverse_nxv16i1: @@ -367,8 +346,7 @@ define <vscale x 16 x i1> @reverse_nxv16i1(<vscale x 16 x i1> %a) { ; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v13, v10, v8 ; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v12, v11, v8 ; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vand.vi v8, v12, 1 -; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0 +; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v12, 0 ; RV64-BITS-UNKNOWN-NEXT: ret ; ; RV64-BITS-256-LABEL: reverse_nxv16i1: @@ -385,8 +363,7 @@ define <vscale x 16 x i1> @reverse_nxv16i1(<vscale x 16 x i1> %a) { ; RV64-BITS-256-NEXT: vrgather.vv v13, v10, v8 ; RV64-BITS-256-NEXT: vrgather.vv v12, v11, v8 ; RV64-BITS-256-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; RV64-BITS-256-NEXT: vand.vi v8, v12, 1 -; RV64-BITS-256-NEXT: vmsne.vi v0, v8, 0 +; RV64-BITS-256-NEXT: vmsne.vi v0, v12, 0 ; RV64-BITS-256-NEXT: ret ; ; RV64-BITS-512-LABEL: reverse_nxv16i1: @@ -403,8 +380,7 @@ define <vscale x 16 x i1> @reverse_nxv16i1(<vscale x 16 x i1> %a) { ; RV64-BITS-512-NEXT: vrgather.vv v13, v10, v8 ; RV64-BITS-512-NEXT: vrgather.vv v12, v11, v8 ; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; RV64-BITS-512-NEXT: vand.vi v8, v12, 1 -; RV64-BITS-512-NEXT: vmsne.vi v0, v8, 0 +; RV64-BITS-512-NEXT: vmsne.vi v0, v12, 0 ; RV64-BITS-512-NEXT: ret %res = call <vscale x 16 x i1> @llvm.vector.reverse.nxv16i1(<vscale x 16 x i1> %a) ret <vscale x 16 x i1> %res @@ -427,7 +403,6 @@ define <vscale x 32 x i1> @reverse_nxv32i1(<vscale x 32 x i1> %a) { ; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v9, v18, v12 ; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v8, v19, v12 ; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vand.vi v8, v8, 1 ; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0 ; RV32-BITS-UNKNOWN-NEXT: ret ; @@ -447,7 +422,6 @@ define <vscale x 32 x i1> @reverse_nxv32i1(<vscale x 32 x i1> %a) { ; RV32-BITS-256-NEXT: vrgather.vv v9, v18, v12 ; RV32-BITS-256-NEXT: vrgather.vv v8, v19, v12 ; RV32-BITS-256-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; RV32-BITS-256-NEXT: vand.vi v8, v8, 1 ; RV32-BITS-256-NEXT: vmsne.vi v0, v8, 0 ; RV32-BITS-256-NEXT: ret ; @@ -467,7 +441,6 @@ define <vscale x 32 x i1> @reverse_nxv32i1(<vscale x 32 x i1> %a) { ; RV32-BITS-512-NEXT: vrgather.vv v9, v18, v12 ; RV32-BITS-512-NEXT: vrgather.vv v8, v19, v12 ; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; RV32-BITS-512-NEXT: vand.vi v8, v8, 1 ; RV32-BITS-512-NEXT: vmsne.vi v0, v8, 0 ; RV32-BITS-512-NEXT: ret ; @@ -487,7 +460,6 @@ define <vscale x 32 x i1> @reverse_nxv32i1(<vscale x 32 x i1> %a) { ; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v9, v18, v12 ; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v8, v19, v12 ; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vand.vi v8, v8, 1 ; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0 ; RV64-BITS-UNKNOWN-NEXT: ret ; @@ -507,7 +479,6 @@ define <vscale x 32 x i1> @reverse_nxv32i1(<vscale x 32 x i1> %a) { ; RV64-BITS-256-NEXT: vrgather.vv v9, v18, v12 ; RV64-BITS-256-NEXT: vrgather.vv v8, v19, v12 ; RV64-BITS-256-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; RV64-BITS-256-NEXT: vand.vi v8, v8, 1 ; RV64-BITS-256-NEXT: vmsne.vi v0, v8, 0 ; RV64-BITS-256-NEXT: ret ; @@ -527,7 +498,6 @@ define <vscale x 32 x i1> @reverse_nxv32i1(<vscale x 32 x i1> %a) { ; RV64-BITS-512-NEXT: vrgather.vv v9, v18, v12 ; RV64-BITS-512-NEXT: vrgather.vv v8, v19, v12 ; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; RV64-BITS-512-NEXT: vand.vi v8, v8, 1 ; RV64-BITS-512-NEXT: vmsne.vi v0, v8, 0 ; RV64-BITS-512-NEXT: ret %res = call <vscale x 32 x i1> @llvm.vector.reverse.nxv32i1(<vscale x 32 x i1> %a) @@ -555,7 +525,6 @@ define <vscale x 64 x i1> @reverse_nxv64i1(<vscale x 64 x i1> %a) { ; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v9, v30, v16 ; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v8, v31, v16 ; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vand.vi v8, v8, 1 ; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0 ; RV32-BITS-UNKNOWN-NEXT: ret ; @@ -579,7 +548,6 @@ define <vscale x 64 x i1> @reverse_nxv64i1(<vscale x 64 x i1> %a) { ; RV32-BITS-256-NEXT: vrgather.vv v9, v22, v24 ; RV32-BITS-256-NEXT: vrgather.vv v8, v23, v24 ; RV32-BITS-256-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; RV32-BITS-256-NEXT: vand.vi v8, v8, 1 ; RV32-BITS-256-NEXT: vmsne.vi v0, v8, 0 ; RV32-BITS-256-NEXT: ret ; @@ -603,7 +571,6 @@ define <vscale x 64 x i1> @reverse_nxv64i1(<vscale x 64 x i1> %a) { ; RV32-BITS-512-NEXT: vrgather.vv v9, v22, v24 ; RV32-BITS-512-NEXT: vrgather.vv v8, v23, v24 ; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; RV32-BITS-512-NEXT: vand.vi v8, v8, 1 ; RV32-BITS-512-NEXT: vmsne.vi v0, v8, 0 ; RV32-BITS-512-NEXT: ret ; @@ -627,7 +594,6 @@ define <vscale x 64 x i1> @reverse_nxv64i1(<vscale x 64 x i1> %a) { ; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v9, v30, v16 ; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v8, v31, v16 ; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vand.vi v8, v8, 1 ; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0 ; RV64-BITS-UNKNOWN-NEXT: ret ; @@ -651,7 +617,6 @@ define <vscale x 64 x i1> @reverse_nxv64i1(<vscale x 64 x i1> %a) { ; RV64-BITS-256-NEXT: vrgather.vv v9, v22, v24 ; RV64-BITS-256-NEXT: vrgather.vv v8, v23, v24 ; RV64-BITS-256-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; RV64-BITS-256-NEXT: vand.vi v8, v8, 1 ; RV64-BITS-256-NEXT: vmsne.vi v0, v8, 0 ; RV64-BITS-256-NEXT: ret ; @@ -675,7 +640,6 @@ define <vscale x 64 x i1> @reverse_nxv64i1(<vscale x 64 x i1> %a) { ; RV64-BITS-512-NEXT: vrgather.vv v9, v22, v24 ; RV64-BITS-512-NEXT: vrgather.vv v8, v23, v24 ; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; RV64-BITS-512-NEXT: vand.vi v8, v8, 1 ; RV64-BITS-512-NEXT: vmsne.vi v0, v8, 0 ; RV64-BITS-512-NEXT: ret %res = call <vscale x 64 x i1> @llvm.vector.reverse.nxv64i1(<vscale x 64 x i1> %a) diff --git a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll index a3ea462b6a73..5aa773b01e69 100644 --- a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll @@ -1,20 +1,420 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN +declare <vscale x 1 x bfloat> @llvm.vp.nearbyint.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x i1>, i32) + +define <vscale x 1 x bfloat> @vp_nearbyint_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_nearbyint_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfabs.v v8, v9, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vmflt.vf v0, v8, fa5, v0.t +; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: fsflags a0 +; CHECK-NEXT: ret + %v = call <vscale x 1 x bfloat> @llvm.vp.nearbyint.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x bfloat> %v +} + +define <vscale x 1 x bfloat> @vp_nearbyint_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_nearbyint_nxv1bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfabs.v v8, v9 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: fsflags a0 +; CHECK-NEXT: ret + %v = call <vscale x 1 x bfloat> @llvm.vp.nearbyint.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl) + ret <vscale x 1 x bfloat> %v +} + +declare <vscale x 2 x bfloat> @llvm.vp.nearbyint.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x i1>, i32) + +define <vscale x 2 x bfloat> @vp_nearbyint_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_nearbyint_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfabs.v v8, v9, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vmflt.vf v0, v8, fa5, v0.t +; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: fsflags a0 +; CHECK-NEXT: ret + %v = call <vscale x 2 x bfloat> @llvm.vp.nearbyint.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x bfloat> %v +} + +define <vscale x 2 x bfloat> @vp_nearbyint_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_nearbyint_nxv2bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfabs.v v8, v9 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: fsflags a0 +; CHECK-NEXT: ret + %v = call <vscale x 2 x bfloat> @llvm.vp.nearbyint.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl) + ret <vscale x 2 x bfloat> %v +} + +declare <vscale x 4 x bfloat> @llvm.vp.nearbyint.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x i1>, i32) + +define <vscale x 4 x bfloat> @vp_nearbyint_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_nearbyint_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfabs.v v12, v10, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vmflt.vf v9, v12, fa5, v0.t +; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v12, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vfsgnj.vv v10, v12, v10, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: fsflags a0 +; CHECK-NEXT: ret + %v = call <vscale x 4 x bfloat> @llvm.vp.nearbyint.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x bfloat> %v +} + +define <vscale x 4 x bfloat> @vp_nearbyint_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_nearbyint_nxv4bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfabs.v v8, v10 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: fsflags a0 +; CHECK-NEXT: ret + %v = call <vscale x 4 x bfloat> @llvm.vp.nearbyint.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl) + ret <vscale x 4 x bfloat> %v +} + +declare <vscale x 8 x bfloat> @llvm.vp.nearbyint.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, i32) + +define <vscale x 8 x bfloat> @vp_nearbyint_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_nearbyint_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfabs.v v16, v12, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vmflt.vf v10, v16, fa5, v0.t +; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v16, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vfsgnj.vv v12, v16, v12, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: fsflags a0 +; CHECK-NEXT: ret + %v = call <vscale x 8 x bfloat> @llvm.vp.nearbyint.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x bfloat> %v +} + +define <vscale x 8 x bfloat> @vp_nearbyint_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_nearbyint_nxv8bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfabs.v v8, v12 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vfsgnj.vv v12, v8, v12, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: fsflags a0 +; CHECK-NEXT: ret + %v = call <vscale x 8 x bfloat> @llvm.vp.nearbyint.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl) + ret <vscale x 8 x bfloat> %v +} + +declare <vscale x 16 x bfloat> @llvm.vp.nearbyint.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x i1>, i32) + +define <vscale x 16 x bfloat> @vp_nearbyint_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_nearbyint_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vmflt.vf v12, v24, fa5, v0.t +; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: fsflags a0 +; CHECK-NEXT: ret + %v = call <vscale x 16 x bfloat> @llvm.vp.nearbyint.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x bfloat> %v +} + +define <vscale x 16 x bfloat> @vp_nearbyint_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_nearbyint_nxv16bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v8, v16 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: fsflags a0 +; CHECK-NEXT: ret + %v = call <vscale x 16 x bfloat> @llvm.vp.nearbyint.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl) + ret <vscale x 16 x bfloat> %v +} + +declare <vscale x 32 x bfloat> @llvm.vp.nearbyint.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x i1>, i32) + +define <vscale x 32 x bfloat> @vp_nearbyint_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_nearbyint_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vmv1r.v v7, v0 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a1, a2, 1 +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: sltu a4, a0, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: srli a2, a2, 2 +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v17, v0, a2 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v8, v24, v0.t +; CHECK-NEXT: lui a2, 307200 +; CHECK-NEXT: fmv.w.x fa5, a2 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vmflt.vf v17, v8, fa5, v0.t +; CHECK-NEXT: frflags a2 +; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: fsflags a2 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24 +; CHECK-NEXT: bltu a0, a1, .LBB10_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB10_2: +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v16, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vmflt.vf v7, v16, fa5, v0.t +; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 +; CHECK-NEXT: fsflags a0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call <vscale x 32 x bfloat> @llvm.vp.nearbyint.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x bfloat> %v +} + +define <vscale x 32 x bfloat> @vp_nearbyint_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_nearbyint_nxv32bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a1, a2, 1 +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: sltu a4, a0, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: srli a2, a2, 2 +; CHECK-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v16, v16, a2 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v8, v24, v0.t +; CHECK-NEXT: lui a2, 307200 +; CHECK-NEXT: fmv.w.x fa5, a2 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vmflt.vf v16, v8, fa5, v0.t +; CHECK-NEXT: frflags a2 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: fsflags a2 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24 +; CHECK-NEXT: bltu a0, a1, .LBB11_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB11_2: +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v16 +; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: fsflags a0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call <vscale x 32 x bfloat> @llvm.vp.nearbyint.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl) + ret <vscale x 32 x bfloat> %v +} declare <vscale x 1 x half> @llvm.vp.nearbyint.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32) define <vscale x 1 x half> @vp_nearbyint_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_nearbyint_nxv1f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI0_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI12_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI12_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu @@ -55,8 +455,8 @@ define <vscale x 1 x half> @vp_nearbyint_nxv1f16(<vscale x 1 x half> %va, <vscal define <vscale x 1 x half> @vp_nearbyint_nxv1f16_unmasked(<vscale x 1 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_nearbyint_nxv1f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI1_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI13_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI13_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 @@ -95,8 +495,8 @@ declare <vscale x 2 x half> @llvm.vp.nearbyint.nxv2f16(<vscale x 2 x half>, <vsc define <vscale x 2 x half> @vp_nearbyint_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_nearbyint_nxv2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI2_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI2_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI14_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI14_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu @@ -137,8 +537,8 @@ define <vscale x 2 x half> @vp_nearbyint_nxv2f16(<vscale x 2 x half> %va, <vscal define <vscale x 2 x half> @vp_nearbyint_nxv2f16_unmasked(<vscale x 2 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_nearbyint_nxv2f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI3_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI3_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI15_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI15_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 @@ -177,8 +577,8 @@ declare <vscale x 4 x half> @llvm.vp.nearbyint.nxv4f16(<vscale x 4 x half>, <vsc define <vscale x 4 x half> @vp_nearbyint_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_nearbyint_nxv4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI4_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI4_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI16_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI16_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu @@ -221,8 +621,8 @@ define <vscale x 4 x half> @vp_nearbyint_nxv4f16(<vscale x 4 x half> %va, <vscal define <vscale x 4 x half> @vp_nearbyint_nxv4f16_unmasked(<vscale x 4 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_nearbyint_nxv4f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI5_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI5_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI17_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI17_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 @@ -261,8 +661,8 @@ declare <vscale x 8 x half> @llvm.vp.nearbyint.nxv8f16(<vscale x 8 x half>, <vsc define <vscale x 8 x half> @vp_nearbyint_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_nearbyint_nxv8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI6_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI18_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI18_0)(a1) ; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8, v0.t @@ -307,8 +707,8 @@ define <vscale x 8 x half> @vp_nearbyint_nxv8f16(<vscale x 8 x half> %va, <vscal define <vscale x 8 x half> @vp_nearbyint_nxv8f16_unmasked(<vscale x 8 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_nearbyint_nxv8f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI7_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI19_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI19_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 ; ZVFH-NEXT: vmflt.vf v0, v10, fa5 @@ -347,8 +747,8 @@ declare <vscale x 16 x half> @llvm.vp.nearbyint.nxv16f16(<vscale x 16 x half>, < define <vscale x 16 x half> @vp_nearbyint_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_nearbyint_nxv16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI8_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI20_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI20_0)(a1) ; ZVFH-NEXT: vmv1r.v v12, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8, v0.t @@ -393,8 +793,8 @@ define <vscale x 16 x half> @vp_nearbyint_nxv16f16(<vscale x 16 x half> %va, <vs define <vscale x 16 x half> @vp_nearbyint_nxv16f16_unmasked(<vscale x 16 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_nearbyint_nxv16f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI9_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI21_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI21_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8 ; ZVFH-NEXT: vmflt.vf v0, v12, fa5 @@ -433,8 +833,8 @@ declare <vscale x 32 x half> @llvm.vp.nearbyint.nxv32f16(<vscale x 32 x half>, < define <vscale x 32 x half> @vp_nearbyint_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_nearbyint_nxv32f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI10_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI22_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI22_0)(a1) ; ZVFH-NEXT: vmv1r.v v16, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v24, v8, v0.t @@ -489,10 +889,10 @@ define <vscale x 32 x half> @vp_nearbyint_nxv32f16(<vscale x 32 x half> %va, <vs ; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 -; ZVFHMIN-NEXT: bltu a0, a1, .LBB10_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB22_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB10_2: +; ZVFHMIN-NEXT: .LBB22_2: ; ZVFHMIN-NEXT: addi a1, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 @@ -523,8 +923,8 @@ define <vscale x 32 x half> @vp_nearbyint_nxv32f16(<vscale x 32 x half> %va, <vs define <vscale x 32 x half> @vp_nearbyint_nxv32f16_unmasked(<vscale x 32 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_nearbyint_nxv32f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI11_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI23_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI23_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8 ; ZVFH-NEXT: vmflt.vf v0, v16, fa5 @@ -576,10 +976,10 @@ define <vscale x 32 x half> @vp_nearbyint_nxv32f16_unmasked(<vscale x 32 x half> ; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 -; ZVFHMIN-NEXT: bltu a0, a1, .LBB11_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB11_2: +; ZVFHMIN-NEXT: .LBB23_2: ; ZVFHMIN-NEXT: addi a1, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 @@ -824,8 +1224,8 @@ declare <vscale x 1 x double> @llvm.vp.nearbyint.nxv1f64(<vscale x 1 x double>, define <vscale x 1 x double> @vp_nearbyint_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI22_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI34_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI34_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu @@ -845,8 +1245,8 @@ define <vscale x 1 x double> @vp_nearbyint_nxv1f64(<vscale x 1 x double> %va, <v define <vscale x 1 x double> @vp_nearbyint_nxv1f64_unmasked(<vscale x 1 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv1f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI23_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI35_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI35_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -866,8 +1266,8 @@ declare <vscale x 2 x double> @llvm.vp.nearbyint.nxv2f64(<vscale x 2 x double>, define <vscale x 2 x double> @vp_nearbyint_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI24_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI36_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a1) ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t @@ -889,8 +1289,8 @@ define <vscale x 2 x double> @vp_nearbyint_nxv2f64(<vscale x 2 x double> %va, <v define <vscale x 2 x double> @vp_nearbyint_nxv2f64_unmasked(<vscale x 2 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv2f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI25_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI37_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI37_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -910,8 +1310,8 @@ declare <vscale x 4 x double> @llvm.vp.nearbyint.nxv4f64(<vscale x 4 x double>, define <vscale x 4 x double> @vp_nearbyint_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI26_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI38_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a1) ; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t @@ -933,8 +1333,8 @@ define <vscale x 4 x double> @vp_nearbyint_nxv4f64(<vscale x 4 x double> %va, <v define <vscale x 4 x double> @vp_nearbyint_nxv4f64_unmasked(<vscale x 4 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv4f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI27_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI39_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI39_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -954,8 +1354,8 @@ declare <vscale x 7 x double> @llvm.vp.nearbyint.nxv7f64(<vscale x 7 x double>, define <vscale x 7 x double> @vp_nearbyint_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv7f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI28_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI28_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI40_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a1) ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t @@ -977,8 +1377,8 @@ define <vscale x 7 x double> @vp_nearbyint_nxv7f64(<vscale x 7 x double> %va, <v define <vscale x 7 x double> @vp_nearbyint_nxv7f64_unmasked(<vscale x 7 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv7f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI29_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI29_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI41_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI41_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -998,8 +1398,8 @@ declare <vscale x 8 x double> @llvm.vp.nearbyint.nxv8f64(<vscale x 8 x double>, define <vscale x 8 x double> @vp_nearbyint_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI30_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI30_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI42_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a1) ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t @@ -1021,8 +1421,8 @@ define <vscale x 8 x double> @vp_nearbyint_nxv8f64(<vscale x 8 x double> %va, <v define <vscale x 8 x double> @vp_nearbyint_nxv8f64_unmasked(<vscale x 8 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv8f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI31_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI31_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI43_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI43_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -1049,8 +1449,8 @@ define <vscale x 16 x double> @vp_nearbyint_nxv16f64(<vscale x 16 x double> %va, ; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vx v6, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: lui a3, %hi(.LCPI32_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI32_0)(a3) +; CHECK-NEXT: lui a3, %hi(.LCPI44_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI44_0)(a3) ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 @@ -1067,10 +1467,10 @@ define <vscale x 16 x double> @vp_nearbyint_nxv16f64(<vscale x 16 x double> %va, ; CHECK-NEXT: fsflags a2 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB32_2 +; CHECK-NEXT: bltu a0, a1, .LBB44_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB32_2: +; CHECK-NEXT: .LBB44_2: ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t @@ -1094,8 +1494,8 @@ define <vscale x 16 x double> @vp_nearbyint_nxv16f64_unmasked(<vscale x 16 x dou ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: lui a3, %hi(.LCPI33_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI33_0)(a3) +; CHECK-NEXT: lui a3, %hi(.LCPI45_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI45_0)(a3) ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 @@ -1108,10 +1508,10 @@ define <vscale x 16 x double> @vp_nearbyint_nxv16f64_unmasked(<vscale x 16 x dou ; CHECK-NEXT: fsflags a2 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB33_2 +; CHECK-NEXT: bltu a0, a1, .LBB45_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB33_2: +; CHECK-NEXT: .LBB45_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 diff --git a/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll index 88bd92c6ec16..a454f9dd97ce 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll @@ -1,20 +1,397 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN +declare <vscale x 1 x bfloat> @llvm.vp.rint.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x i1>, i32) + +define <vscale x 1 x bfloat> @vp_rint_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_rint_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfabs.v v8, v9, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vmflt.vf v0, v8, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 1 x bfloat> @llvm.vp.rint.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x bfloat> %v +} + +define <vscale x 1 x bfloat> @vp_rint_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_rint_nxv1bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfabs.v v8, v9 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 1 x bfloat> @llvm.vp.rint.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl) + ret <vscale x 1 x bfloat> %v +} + +declare <vscale x 2 x bfloat> @llvm.vp.rint.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x i1>, i32) + +define <vscale x 2 x bfloat> @vp_rint_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_rint_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfabs.v v8, v9, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vmflt.vf v0, v8, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 2 x bfloat> @llvm.vp.rint.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x bfloat> %v +} + +define <vscale x 2 x bfloat> @vp_rint_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_rint_nxv2bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfabs.v v8, v9 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 2 x bfloat> @llvm.vp.rint.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl) + ret <vscale x 2 x bfloat> %v +} + +declare <vscale x 4 x bfloat> @llvm.vp.rint.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x i1>, i32) + +define <vscale x 4 x bfloat> @vp_rint_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_rint_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfabs.v v12, v10, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vmflt.vf v9, v12, fa5, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v12, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vfsgnj.vv v10, v12, v10, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %v = call <vscale x 4 x bfloat> @llvm.vp.rint.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x bfloat> %v +} + +define <vscale x 4 x bfloat> @vp_rint_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_rint_nxv4bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfabs.v v8, v10 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %v = call <vscale x 4 x bfloat> @llvm.vp.rint.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl) + ret <vscale x 4 x bfloat> %v +} + +declare <vscale x 8 x bfloat> @llvm.vp.rint.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, i32) + +define <vscale x 8 x bfloat> @vp_rint_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_rint_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfabs.v v16, v12, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vmflt.vf v10, v16, fa5, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v16, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vfsgnj.vv v12, v16, v12, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %v = call <vscale x 8 x bfloat> @llvm.vp.rint.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x bfloat> %v +} + +define <vscale x 8 x bfloat> @vp_rint_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_rint_nxv8bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfabs.v v8, v12 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vfsgnj.vv v12, v8, v12, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %v = call <vscale x 8 x bfloat> @llvm.vp.rint.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl) + ret <vscale x 8 x bfloat> %v +} + +declare <vscale x 16 x bfloat> @llvm.vp.rint.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x i1>, i32) + +define <vscale x 16 x bfloat> @vp_rint_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_rint_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vmflt.vf v12, v24, fa5, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %v = call <vscale x 16 x bfloat> @llvm.vp.rint.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x bfloat> %v +} + +define <vscale x 16 x bfloat> @vp_rint_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_rint_nxv16bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v8, v16 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %v = call <vscale x 16 x bfloat> @llvm.vp.rint.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl) + ret <vscale x 16 x bfloat> %v +} + +declare <vscale x 32 x bfloat> @llvm.vp.rint.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x i1>, i32) + +define <vscale x 32 x bfloat> @vp_rint_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_rint_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a1, a2, 1 +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: sltu a4, a0, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: srli a2, a2, 2 +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v17, v0, a2 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v8, v24, v0.t +; CHECK-NEXT: lui a2, 307200 +; CHECK-NEXT: fmv.w.x fa5, a2 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vmflt.vf v17, v8, fa5, v0.t +; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24 +; CHECK-NEXT: bltu a0, a1, .LBB10_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB10_2: +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v0 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmv1r.v v8, v16 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v16, v24, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vmflt.vf v8, v16, fa5, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call <vscale x 32 x bfloat> @llvm.vp.rint.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x bfloat> %v +} + +define <vscale x 32 x bfloat> @vp_rint_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_rint_nxv32bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a1, a2, 1 +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: sltu a4, a0, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: srli a2, a2, 2 +; CHECK-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v16, v16, a2 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v8, v24, v0.t +; CHECK-NEXT: lui a2, 307200 +; CHECK-NEXT: fmv.w.x fa5, a2 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vmflt.vf v16, v8, fa5, v0.t +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24 +; CHECK-NEXT: bltu a0, a1, .LBB11_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB11_2: +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v16 +; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call <vscale x 32 x bfloat> @llvm.vp.rint.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl) + ret <vscale x 32 x bfloat> %v +} declare <vscale x 1 x half> @llvm.vp.rint.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32) define <vscale x 1 x half> @vp_rint_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_rint_nxv1f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI0_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI12_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI12_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu @@ -51,8 +428,8 @@ define <vscale x 1 x half> @vp_rint_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 define <vscale x 1 x half> @vp_rint_nxv1f16_unmasked(<vscale x 1 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_rint_nxv1f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI1_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI13_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI13_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 @@ -87,8 +464,8 @@ declare <vscale x 2 x half> @llvm.vp.rint.nxv2f16(<vscale x 2 x half>, <vscale x define <vscale x 2 x half> @vp_rint_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_rint_nxv2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI2_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI2_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI14_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI14_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu @@ -125,8 +502,8 @@ define <vscale x 2 x half> @vp_rint_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 define <vscale x 2 x half> @vp_rint_nxv2f16_unmasked(<vscale x 2 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_rint_nxv2f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI3_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI3_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI15_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI15_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 @@ -161,8 +538,8 @@ declare <vscale x 4 x half> @llvm.vp.rint.nxv4f16(<vscale x 4 x half>, <vscale x define <vscale x 4 x half> @vp_rint_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_rint_nxv4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI4_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI4_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI16_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI16_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu @@ -201,8 +578,8 @@ define <vscale x 4 x half> @vp_rint_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 define <vscale x 4 x half> @vp_rint_nxv4f16_unmasked(<vscale x 4 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_rint_nxv4f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI5_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI5_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI17_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI17_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 @@ -237,8 +614,8 @@ declare <vscale x 8 x half> @llvm.vp.rint.nxv8f16(<vscale x 8 x half>, <vscale x define <vscale x 8 x half> @vp_rint_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_rint_nxv8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI6_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI18_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI18_0)(a1) ; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8, v0.t @@ -279,8 +656,8 @@ define <vscale x 8 x half> @vp_rint_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 define <vscale x 8 x half> @vp_rint_nxv8f16_unmasked(<vscale x 8 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_rint_nxv8f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI7_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI19_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI19_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 ; ZVFH-NEXT: vmflt.vf v0, v10, fa5 @@ -315,8 +692,8 @@ declare <vscale x 16 x half> @llvm.vp.rint.nxv16f16(<vscale x 16 x half>, <vscal define <vscale x 16 x half> @vp_rint_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_rint_nxv16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI8_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI20_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI20_0)(a1) ; ZVFH-NEXT: vmv1r.v v12, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8, v0.t @@ -357,8 +734,8 @@ define <vscale x 16 x half> @vp_rint_nxv16f16(<vscale x 16 x half> %va, <vscale define <vscale x 16 x half> @vp_rint_nxv16f16_unmasked(<vscale x 16 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_rint_nxv16f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI9_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI21_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI21_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8 ; ZVFH-NEXT: vmflt.vf v0, v12, fa5 @@ -393,8 +770,8 @@ declare <vscale x 32 x half> @llvm.vp.rint.nxv32f16(<vscale x 32 x half>, <vscal define <vscale x 32 x half> @vp_rint_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_rint_nxv32f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI10_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI22_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI22_0)(a1) ; ZVFH-NEXT: vmv1r.v v16, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v24, v8, v0.t @@ -445,10 +822,10 @@ define <vscale x 32 x half> @vp_rint_nxv32f16(<vscale x 32 x half> %va, <vscale ; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 -; ZVFHMIN-NEXT: bltu a0, a1, .LBB10_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB22_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB10_2: +; ZVFHMIN-NEXT: .LBB22_2: ; ZVFHMIN-NEXT: addi a1, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 @@ -482,8 +859,8 @@ define <vscale x 32 x half> @vp_rint_nxv32f16(<vscale x 32 x half> %va, <vscale define <vscale x 32 x half> @vp_rint_nxv32f16_unmasked(<vscale x 32 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_rint_nxv32f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI11_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI23_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI23_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8 ; ZVFH-NEXT: vmflt.vf v0, v16, fa5 @@ -531,10 +908,10 @@ define <vscale x 32 x half> @vp_rint_nxv32f16_unmasked(<vscale x 32 x half> %va, ; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 -; ZVFHMIN-NEXT: bltu a0, a1, .LBB11_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB11_2: +; ZVFHMIN-NEXT: .LBB23_2: ; ZVFHMIN-NEXT: addi a1, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 @@ -757,8 +1134,8 @@ declare <vscale x 1 x double> @llvm.vp.rint.nxv1f64(<vscale x 1 x double>, <vsca define <vscale x 1 x double> @vp_rint_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI22_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI34_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI34_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu @@ -776,8 +1153,8 @@ define <vscale x 1 x double> @vp_rint_nxv1f64(<vscale x 1 x double> %va, <vscale define <vscale x 1 x double> @vp_rint_nxv1f64_unmasked(<vscale x 1 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv1f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI23_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI35_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI35_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -795,8 +1172,8 @@ declare <vscale x 2 x double> @llvm.vp.rint.nxv2f64(<vscale x 2 x double>, <vsca define <vscale x 2 x double> @vp_rint_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI24_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI36_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a1) ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t @@ -816,8 +1193,8 @@ define <vscale x 2 x double> @vp_rint_nxv2f64(<vscale x 2 x double> %va, <vscale define <vscale x 2 x double> @vp_rint_nxv2f64_unmasked(<vscale x 2 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv2f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI25_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI37_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI37_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -835,8 +1212,8 @@ declare <vscale x 4 x double> @llvm.vp.rint.nxv4f64(<vscale x 4 x double>, <vsca define <vscale x 4 x double> @vp_rint_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI26_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI38_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a1) ; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t @@ -856,8 +1233,8 @@ define <vscale x 4 x double> @vp_rint_nxv4f64(<vscale x 4 x double> %va, <vscale define <vscale x 4 x double> @vp_rint_nxv4f64_unmasked(<vscale x 4 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv4f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI27_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI39_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI39_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -875,8 +1252,8 @@ declare <vscale x 7 x double> @llvm.vp.rint.nxv7f64(<vscale x 7 x double>, <vsca define <vscale x 7 x double> @vp_rint_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv7f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI28_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI28_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI40_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a1) ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t @@ -896,8 +1273,8 @@ define <vscale x 7 x double> @vp_rint_nxv7f64(<vscale x 7 x double> %va, <vscale define <vscale x 7 x double> @vp_rint_nxv7f64_unmasked(<vscale x 7 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv7f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI29_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI29_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI41_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI41_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -915,8 +1292,8 @@ declare <vscale x 8 x double> @llvm.vp.rint.nxv8f64(<vscale x 8 x double>, <vsca define <vscale x 8 x double> @vp_rint_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI30_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI30_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI42_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a1) ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t @@ -936,8 +1313,8 @@ define <vscale x 8 x double> @vp_rint_nxv8f64(<vscale x 8 x double> %va, <vscale define <vscale x 8 x double> @vp_rint_nxv8f64_unmasked(<vscale x 8 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv8f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI31_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI31_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI43_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI43_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -968,8 +1345,8 @@ define <vscale x 16 x double> @vp_rint_nxv16f64(<vscale x 16 x double> %va, <vsc ; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vx v6, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: lui a3, %hi(.LCPI32_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI32_0)(a3) +; CHECK-NEXT: lui a3, %hi(.LCPI44_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI44_0)(a3) ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 @@ -987,10 +1364,10 @@ define <vscale x 16 x double> @vp_rint_nxv16f64(<vscale x 16 x double> %va, <vsc ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB32_2 +; CHECK-NEXT: bltu a0, a1, .LBB44_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB32_2: +; CHECK-NEXT: .LBB44_2: ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t @@ -1016,8 +1393,8 @@ define <vscale x 16 x double> @vp_rint_nxv16f64_unmasked(<vscale x 16 x double> ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: lui a3, %hi(.LCPI33_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI33_0)(a3) +; CHECK-NEXT: lui a3, %hi(.LCPI45_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI45_0)(a3) ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 @@ -1028,10 +1405,10 @@ define <vscale x 16 x double> @vp_rint_nxv16f64_unmasked(<vscale x 16 x double> ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB33_2 +; CHECK-NEXT: bltu a0, a1, .LBB45_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB33_2: +; CHECK-NEXT: .LBB45_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 diff --git a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll index 1ddadcc49373..a4936483e8a1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll @@ -1,20 +1,428 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN +declare <vscale x 1 x bfloat> @llvm.vp.round.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x i1>, i32) + +define <vscale x 1 x bfloat> @vp_round_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_round_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfabs.v v8, v9, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vmflt.vf v0, v8, fa5, v0.t +; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 1 x bfloat> @llvm.vp.round.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x bfloat> %v +} + +define <vscale x 1 x bfloat> @vp_round_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_round_nxv1bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfabs.v v8, v9 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 1 x bfloat> @llvm.vp.round.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl) + ret <vscale x 1 x bfloat> %v +} + +declare <vscale x 2 x bfloat> @llvm.vp.round.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x i1>, i32) + +define <vscale x 2 x bfloat> @vp_round_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_round_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfabs.v v8, v9, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vmflt.vf v0, v8, fa5, v0.t +; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 2 x bfloat> @llvm.vp.round.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x bfloat> %v +} + +define <vscale x 2 x bfloat> @vp_round_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_round_nxv2bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfabs.v v8, v9 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 2 x bfloat> @llvm.vp.round.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl) + ret <vscale x 2 x bfloat> %v +} + +declare <vscale x 4 x bfloat> @llvm.vp.round.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x i1>, i32) + +define <vscale x 4 x bfloat> @vp_round_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_round_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfabs.v v12, v10, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vmflt.vf v9, v12, fa5, v0.t +; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v12, v10, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vfsgnj.vv v10, v12, v10, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %v = call <vscale x 4 x bfloat> @llvm.vp.round.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x bfloat> %v +} + +define <vscale x 4 x bfloat> @vp_round_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_round_nxv4bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfabs.v v8, v10 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %v = call <vscale x 4 x bfloat> @llvm.vp.round.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl) + ret <vscale x 4 x bfloat> %v +} + +declare <vscale x 8 x bfloat> @llvm.vp.round.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, i32) + +define <vscale x 8 x bfloat> @vp_round_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_round_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfabs.v v16, v12, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vmflt.vf v10, v16, fa5, v0.t +; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v16, v12, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vfsgnj.vv v12, v16, v12, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %v = call <vscale x 8 x bfloat> @llvm.vp.round.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x bfloat> %v +} + +define <vscale x 8 x bfloat> @vp_round_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_round_nxv8bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfabs.v v8, v12 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vfsgnj.vv v12, v8, v12, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %v = call <vscale x 8 x bfloat> @llvm.vp.round.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl) + ret <vscale x 8 x bfloat> %v +} + +declare <vscale x 16 x bfloat> @llvm.vp.round.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x i1>, i32) + +define <vscale x 16 x bfloat> @vp_round_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_round_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vmflt.vf v12, v24, fa5, v0.t +; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %v = call <vscale x 16 x bfloat> @llvm.vp.round.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x bfloat> %v +} + +define <vscale x 16 x bfloat> @vp_round_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_round_nxv16bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v8, v16 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %v = call <vscale x 16 x bfloat> @llvm.vp.round.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl) + ret <vscale x 16 x bfloat> %v +} + +declare <vscale x 32 x bfloat> @llvm.vp.round.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x i1>, i32) + +define <vscale x 32 x bfloat> @vp_round_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_round_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a1, a2, 1 +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: sltu a4, a0, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: srli a2, a2, 2 +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v17, v0, a2 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v8, v24, v0.t +; CHECK-NEXT: lui a2, 307200 +; CHECK-NEXT: fmv.w.x fa5, a2 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vmflt.vf v17, v8, fa5, v0.t +; CHECK-NEXT: fsrmi a2, 4 +; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t +; CHECK-NEXT: fsrm a2 +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24 +; CHECK-NEXT: bltu a0, a1, .LBB10_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB10_2: +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v0 +; CHECK-NEXT: vmv1r.v v8, v16 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v16, v24, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vmflt.vf v8, v16, fa5, v0.t +; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call <vscale x 32 x bfloat> @llvm.vp.round.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x bfloat> %v +} + +define <vscale x 32 x bfloat> @vp_round_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_round_nxv32bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a1, a2, 1 +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: sltu a4, a0, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: srli a2, a2, 2 +; CHECK-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v16, v16, a2 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v8, v24, v0.t +; CHECK-NEXT: lui a2, 307200 +; CHECK-NEXT: fmv.w.x fa5, a2 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vmflt.vf v16, v8, fa5, v0.t +; CHECK-NEXT: fsrmi a2, 4 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t +; CHECK-NEXT: fsrm a2 +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24 +; CHECK-NEXT: bltu a0, a1, .LBB11_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB11_2: +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v16 +; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call <vscale x 32 x bfloat> @llvm.vp.round.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl) + ret <vscale x 32 x bfloat> %v +} declare <vscale x 1 x half> @llvm.vp.round.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32) define <vscale x 1 x half> @vp_round_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_nxv1f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI0_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI12_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI12_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu @@ -55,8 +463,8 @@ define <vscale x 1 x half> @vp_round_nxv1f16(<vscale x 1 x half> %va, <vscale x define <vscale x 1 x half> @vp_round_nxv1f16_unmasked(<vscale x 1 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_nxv1f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI1_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI13_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI13_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 @@ -95,8 +503,8 @@ declare <vscale x 2 x half> @llvm.vp.round.nxv2f16(<vscale x 2 x half>, <vscale define <vscale x 2 x half> @vp_round_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_nxv2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI2_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI2_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI14_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI14_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu @@ -137,8 +545,8 @@ define <vscale x 2 x half> @vp_round_nxv2f16(<vscale x 2 x half> %va, <vscale x define <vscale x 2 x half> @vp_round_nxv2f16_unmasked(<vscale x 2 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_nxv2f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI3_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI3_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI15_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI15_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 @@ -177,8 +585,8 @@ declare <vscale x 4 x half> @llvm.vp.round.nxv4f16(<vscale x 4 x half>, <vscale define <vscale x 4 x half> @vp_round_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_nxv4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI4_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI4_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI16_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI16_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu @@ -221,8 +629,8 @@ define <vscale x 4 x half> @vp_round_nxv4f16(<vscale x 4 x half> %va, <vscale x define <vscale x 4 x half> @vp_round_nxv4f16_unmasked(<vscale x 4 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_nxv4f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI5_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI5_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI17_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI17_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 @@ -261,8 +669,8 @@ declare <vscale x 8 x half> @llvm.vp.round.nxv8f16(<vscale x 8 x half>, <vscale define <vscale x 8 x half> @vp_round_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_nxv8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI6_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI18_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI18_0)(a1) ; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8, v0.t @@ -307,8 +715,8 @@ define <vscale x 8 x half> @vp_round_nxv8f16(<vscale x 8 x half> %va, <vscale x define <vscale x 8 x half> @vp_round_nxv8f16_unmasked(<vscale x 8 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_nxv8f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI7_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI19_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI19_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 ; ZVFH-NEXT: vmflt.vf v0, v10, fa5 @@ -347,8 +755,8 @@ declare <vscale x 16 x half> @llvm.vp.round.nxv16f16(<vscale x 16 x half>, <vsca define <vscale x 16 x half> @vp_round_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_nxv16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI8_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI20_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI20_0)(a1) ; ZVFH-NEXT: vmv1r.v v12, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8, v0.t @@ -393,8 +801,8 @@ define <vscale x 16 x half> @vp_round_nxv16f16(<vscale x 16 x half> %va, <vscale define <vscale x 16 x half> @vp_round_nxv16f16_unmasked(<vscale x 16 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_nxv16f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI9_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI21_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI21_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8 ; ZVFH-NEXT: vmflt.vf v0, v12, fa5 @@ -433,8 +841,8 @@ declare <vscale x 32 x half> @llvm.vp.round.nxv32f16(<vscale x 32 x half>, <vsca define <vscale x 32 x half> @vp_round_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_nxv32f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI10_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI22_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI22_0)(a1) ; ZVFH-NEXT: vmv1r.v v16, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v24, v8, v0.t @@ -489,10 +897,10 @@ define <vscale x 32 x half> @vp_round_nxv32f16(<vscale x 32 x half> %va, <vscale ; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 -; ZVFHMIN-NEXT: bltu a0, a1, .LBB10_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB22_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB10_2: +; ZVFHMIN-NEXT: .LBB22_2: ; ZVFHMIN-NEXT: addi a1, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 @@ -531,8 +939,8 @@ define <vscale x 32 x half> @vp_round_nxv32f16(<vscale x 32 x half> %va, <vscale define <vscale x 32 x half> @vp_round_nxv32f16_unmasked(<vscale x 32 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_nxv32f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI11_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI23_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI23_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8 ; ZVFH-NEXT: vmflt.vf v0, v16, fa5 @@ -584,10 +992,10 @@ define <vscale x 32 x half> @vp_round_nxv32f16_unmasked(<vscale x 32 x half> %va ; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 -; ZVFHMIN-NEXT: bltu a0, a1, .LBB11_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB11_2: +; ZVFHMIN-NEXT: .LBB23_2: ; ZVFHMIN-NEXT: addi a1, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 @@ -832,8 +1240,8 @@ declare <vscale x 1 x double> @llvm.vp.round.nxv1f64(<vscale x 1 x double>, <vsc define <vscale x 1 x double> @vp_round_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI22_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI34_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI34_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu @@ -853,8 +1261,8 @@ define <vscale x 1 x double> @vp_round_nxv1f64(<vscale x 1 x double> %va, <vscal define <vscale x 1 x double> @vp_round_nxv1f64_unmasked(<vscale x 1 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv1f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI23_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI35_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI35_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -874,8 +1282,8 @@ declare <vscale x 2 x double> @llvm.vp.round.nxv2f64(<vscale x 2 x double>, <vsc define <vscale x 2 x double> @vp_round_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI24_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI36_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a1) ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t @@ -897,8 +1305,8 @@ define <vscale x 2 x double> @vp_round_nxv2f64(<vscale x 2 x double> %va, <vscal define <vscale x 2 x double> @vp_round_nxv2f64_unmasked(<vscale x 2 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv2f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI25_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI37_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI37_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -918,8 +1326,8 @@ declare <vscale x 4 x double> @llvm.vp.round.nxv4f64(<vscale x 4 x double>, <vsc define <vscale x 4 x double> @vp_round_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI26_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI38_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a1) ; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t @@ -941,8 +1349,8 @@ define <vscale x 4 x double> @vp_round_nxv4f64(<vscale x 4 x double> %va, <vscal define <vscale x 4 x double> @vp_round_nxv4f64_unmasked(<vscale x 4 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv4f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI27_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI39_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI39_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -962,8 +1370,8 @@ declare <vscale x 7 x double> @llvm.vp.round.nxv7f64(<vscale x 7 x double>, <vsc define <vscale x 7 x double> @vp_round_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv7f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI28_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI28_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI40_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a1) ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t @@ -985,8 +1393,8 @@ define <vscale x 7 x double> @vp_round_nxv7f64(<vscale x 7 x double> %va, <vscal define <vscale x 7 x double> @vp_round_nxv7f64_unmasked(<vscale x 7 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv7f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI29_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI29_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI41_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI41_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -1006,8 +1414,8 @@ declare <vscale x 8 x double> @llvm.vp.round.nxv8f64(<vscale x 8 x double>, <vsc define <vscale x 8 x double> @vp_round_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI30_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI30_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI42_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a1) ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t @@ -1029,8 +1437,8 @@ define <vscale x 8 x double> @vp_round_nxv8f64(<vscale x 8 x double> %va, <vscal define <vscale x 8 x double> @vp_round_nxv8f64_unmasked(<vscale x 8 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv8f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI31_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI31_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI43_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI43_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -1063,8 +1471,8 @@ define <vscale x 16 x double> @vp_round_nxv16f64(<vscale x 16 x double> %va, <vs ; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vx v6, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: lui a3, %hi(.LCPI32_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI32_0)(a3) +; CHECK-NEXT: lui a3, %hi(.LCPI44_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI44_0)(a3) ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 @@ -1085,10 +1493,10 @@ define <vscale x 16 x double> @vp_round_nxv16f64(<vscale x 16 x double> %va, <vs ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB32_2 +; CHECK-NEXT: bltu a0, a1, .LBB44_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB32_2: +; CHECK-NEXT: .LBB44_2: ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t @@ -1116,8 +1524,8 @@ define <vscale x 16 x double> @vp_round_nxv16f64_unmasked(<vscale x 16 x double> ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: lui a3, %hi(.LCPI33_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI33_0)(a3) +; CHECK-NEXT: lui a3, %hi(.LCPI45_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI45_0)(a3) ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 @@ -1130,10 +1538,10 @@ define <vscale x 16 x double> @vp_round_nxv16f64_unmasked(<vscale x 16 x double> ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB33_2 +; CHECK-NEXT: bltu a0, a1, .LBB45_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB33_2: +; CHECK-NEXT: .LBB45_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 diff --git a/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll index 8c5a7bb2dea6..9857009002eb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll @@ -1,20 +1,428 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN +declare <vscale x 1 x bfloat> @llvm.vp.roundeven.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x i1>, i32) + +define <vscale x 1 x bfloat> @vp_roundeven_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_roundeven_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfabs.v v8, v9, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vmflt.vf v0, v8, fa5, v0.t +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 1 x bfloat> @llvm.vp.roundeven.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x bfloat> %v +} + +define <vscale x 1 x bfloat> @vp_roundeven_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_roundeven_nxv1bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfabs.v v8, v9 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 1 x bfloat> @llvm.vp.roundeven.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl) + ret <vscale x 1 x bfloat> %v +} + +declare <vscale x 2 x bfloat> @llvm.vp.roundeven.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x i1>, i32) + +define <vscale x 2 x bfloat> @vp_roundeven_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_roundeven_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfabs.v v8, v9, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vmflt.vf v0, v8, fa5, v0.t +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 2 x bfloat> @llvm.vp.roundeven.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x bfloat> %v +} + +define <vscale x 2 x bfloat> @vp_roundeven_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_roundeven_nxv2bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfabs.v v8, v9 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 2 x bfloat> @llvm.vp.roundeven.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl) + ret <vscale x 2 x bfloat> %v +} + +declare <vscale x 4 x bfloat> @llvm.vp.roundeven.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x i1>, i32) + +define <vscale x 4 x bfloat> @vp_roundeven_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_roundeven_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfabs.v v12, v10, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vmflt.vf v9, v12, fa5, v0.t +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v12, v10, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vfsgnj.vv v10, v12, v10, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %v = call <vscale x 4 x bfloat> @llvm.vp.roundeven.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x bfloat> %v +} + +define <vscale x 4 x bfloat> @vp_roundeven_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_roundeven_nxv4bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfabs.v v8, v10 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %v = call <vscale x 4 x bfloat> @llvm.vp.roundeven.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl) + ret <vscale x 4 x bfloat> %v +} + +declare <vscale x 8 x bfloat> @llvm.vp.roundeven.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, i32) + +define <vscale x 8 x bfloat> @vp_roundeven_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_roundeven_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfabs.v v16, v12, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vmflt.vf v10, v16, fa5, v0.t +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v16, v12, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vfsgnj.vv v12, v16, v12, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %v = call <vscale x 8 x bfloat> @llvm.vp.roundeven.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x bfloat> %v +} + +define <vscale x 8 x bfloat> @vp_roundeven_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_roundeven_nxv8bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfabs.v v8, v12 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vfsgnj.vv v12, v8, v12, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %v = call <vscale x 8 x bfloat> @llvm.vp.roundeven.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl) + ret <vscale x 8 x bfloat> %v +} + +declare <vscale x 16 x bfloat> @llvm.vp.roundeven.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x i1>, i32) + +define <vscale x 16 x bfloat> @vp_roundeven_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_roundeven_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vmflt.vf v12, v24, fa5, v0.t +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %v = call <vscale x 16 x bfloat> @llvm.vp.roundeven.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x bfloat> %v +} + +define <vscale x 16 x bfloat> @vp_roundeven_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_roundeven_nxv16bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v8, v16 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %v = call <vscale x 16 x bfloat> @llvm.vp.roundeven.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl) + ret <vscale x 16 x bfloat> %v +} + +declare <vscale x 32 x bfloat> @llvm.vp.roundeven.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x i1>, i32) + +define <vscale x 32 x bfloat> @vp_roundeven_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_roundeven_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a1, a2, 1 +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: sltu a4, a0, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: srli a2, a2, 2 +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v17, v0, a2 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v8, v24, v0.t +; CHECK-NEXT: lui a2, 307200 +; CHECK-NEXT: fmv.w.x fa5, a2 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vmflt.vf v17, v8, fa5, v0.t +; CHECK-NEXT: fsrmi a2, 0 +; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t +; CHECK-NEXT: fsrm a2 +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24 +; CHECK-NEXT: bltu a0, a1, .LBB10_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB10_2: +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v0 +; CHECK-NEXT: vmv1r.v v8, v16 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v16, v24, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vmflt.vf v8, v16, fa5, v0.t +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call <vscale x 32 x bfloat> @llvm.vp.roundeven.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x bfloat> %v +} + +define <vscale x 32 x bfloat> @vp_roundeven_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_roundeven_nxv32bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a1, a2, 1 +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: sltu a4, a0, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: srli a2, a2, 2 +; CHECK-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v16, v16, a2 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v8, v24, v0.t +; CHECK-NEXT: lui a2, 307200 +; CHECK-NEXT: fmv.w.x fa5, a2 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vmflt.vf v16, v8, fa5, v0.t +; CHECK-NEXT: fsrmi a2, 0 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t +; CHECK-NEXT: fsrm a2 +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24 +; CHECK-NEXT: bltu a0, a1, .LBB11_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB11_2: +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v16 +; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call <vscale x 32 x bfloat> @llvm.vp.roundeven.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl) + ret <vscale x 32 x bfloat> %v +} declare <vscale x 1 x half> @llvm.vp.roundeven.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32) define <vscale x 1 x half> @vp_roundeven_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_nxv1f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI0_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI12_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI12_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu @@ -55,8 +463,8 @@ define <vscale x 1 x half> @vp_roundeven_nxv1f16(<vscale x 1 x half> %va, <vscal define <vscale x 1 x half> @vp_roundeven_nxv1f16_unmasked(<vscale x 1 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_nxv1f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI1_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI13_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI13_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 @@ -95,8 +503,8 @@ declare <vscale x 2 x half> @llvm.vp.roundeven.nxv2f16(<vscale x 2 x half>, <vsc define <vscale x 2 x half> @vp_roundeven_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_nxv2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI2_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI2_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI14_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI14_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu @@ -137,8 +545,8 @@ define <vscale x 2 x half> @vp_roundeven_nxv2f16(<vscale x 2 x half> %va, <vscal define <vscale x 2 x half> @vp_roundeven_nxv2f16_unmasked(<vscale x 2 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_nxv2f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI3_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI3_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI15_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI15_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 @@ -177,8 +585,8 @@ declare <vscale x 4 x half> @llvm.vp.roundeven.nxv4f16(<vscale x 4 x half>, <vsc define <vscale x 4 x half> @vp_roundeven_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_nxv4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI4_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI4_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI16_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI16_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu @@ -221,8 +629,8 @@ define <vscale x 4 x half> @vp_roundeven_nxv4f16(<vscale x 4 x half> %va, <vscal define <vscale x 4 x half> @vp_roundeven_nxv4f16_unmasked(<vscale x 4 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_nxv4f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI5_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI5_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI17_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI17_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 @@ -261,8 +669,8 @@ declare <vscale x 8 x half> @llvm.vp.roundeven.nxv8f16(<vscale x 8 x half>, <vsc define <vscale x 8 x half> @vp_roundeven_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_nxv8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI6_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI18_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI18_0)(a1) ; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8, v0.t @@ -307,8 +715,8 @@ define <vscale x 8 x half> @vp_roundeven_nxv8f16(<vscale x 8 x half> %va, <vscal define <vscale x 8 x half> @vp_roundeven_nxv8f16_unmasked(<vscale x 8 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_nxv8f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI7_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI19_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI19_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 ; ZVFH-NEXT: vmflt.vf v0, v10, fa5 @@ -347,8 +755,8 @@ declare <vscale x 16 x half> @llvm.vp.roundeven.nxv16f16(<vscale x 16 x half>, < define <vscale x 16 x half> @vp_roundeven_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_nxv16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI8_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI20_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI20_0)(a1) ; ZVFH-NEXT: vmv1r.v v12, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8, v0.t @@ -393,8 +801,8 @@ define <vscale x 16 x half> @vp_roundeven_nxv16f16(<vscale x 16 x half> %va, <vs define <vscale x 16 x half> @vp_roundeven_nxv16f16_unmasked(<vscale x 16 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_nxv16f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI9_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI21_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI21_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8 ; ZVFH-NEXT: vmflt.vf v0, v12, fa5 @@ -433,8 +841,8 @@ declare <vscale x 32 x half> @llvm.vp.roundeven.nxv32f16(<vscale x 32 x half>, < define <vscale x 32 x half> @vp_roundeven_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_nxv32f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI10_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI22_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI22_0)(a1) ; ZVFH-NEXT: vmv1r.v v16, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v24, v8, v0.t @@ -489,10 +897,10 @@ define <vscale x 32 x half> @vp_roundeven_nxv32f16(<vscale x 32 x half> %va, <vs ; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 -; ZVFHMIN-NEXT: bltu a0, a1, .LBB10_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB22_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB10_2: +; ZVFHMIN-NEXT: .LBB22_2: ; ZVFHMIN-NEXT: addi a1, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 @@ -531,8 +939,8 @@ define <vscale x 32 x half> @vp_roundeven_nxv32f16(<vscale x 32 x half> %va, <vs define <vscale x 32 x half> @vp_roundeven_nxv32f16_unmasked(<vscale x 32 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_nxv32f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI11_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI23_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI23_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8 ; ZVFH-NEXT: vmflt.vf v0, v16, fa5 @@ -584,10 +992,10 @@ define <vscale x 32 x half> @vp_roundeven_nxv32f16_unmasked(<vscale x 32 x half> ; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 -; ZVFHMIN-NEXT: bltu a0, a1, .LBB11_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB11_2: +; ZVFHMIN-NEXT: .LBB23_2: ; ZVFHMIN-NEXT: addi a1, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 @@ -832,8 +1240,8 @@ declare <vscale x 1 x double> @llvm.vp.roundeven.nxv1f64(<vscale x 1 x double>, define <vscale x 1 x double> @vp_roundeven_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI22_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI34_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI34_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu @@ -853,8 +1261,8 @@ define <vscale x 1 x double> @vp_roundeven_nxv1f64(<vscale x 1 x double> %va, <v define <vscale x 1 x double> @vp_roundeven_nxv1f64_unmasked(<vscale x 1 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv1f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI23_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI35_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI35_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -874,8 +1282,8 @@ declare <vscale x 2 x double> @llvm.vp.roundeven.nxv2f64(<vscale x 2 x double>, define <vscale x 2 x double> @vp_roundeven_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI24_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI36_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a1) ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t @@ -897,8 +1305,8 @@ define <vscale x 2 x double> @vp_roundeven_nxv2f64(<vscale x 2 x double> %va, <v define <vscale x 2 x double> @vp_roundeven_nxv2f64_unmasked(<vscale x 2 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv2f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI25_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI37_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI37_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -918,8 +1326,8 @@ declare <vscale x 4 x double> @llvm.vp.roundeven.nxv4f64(<vscale x 4 x double>, define <vscale x 4 x double> @vp_roundeven_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI26_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI38_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a1) ; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t @@ -941,8 +1349,8 @@ define <vscale x 4 x double> @vp_roundeven_nxv4f64(<vscale x 4 x double> %va, <v define <vscale x 4 x double> @vp_roundeven_nxv4f64_unmasked(<vscale x 4 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv4f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI27_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI39_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI39_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -962,8 +1370,8 @@ declare <vscale x 7 x double> @llvm.vp.roundeven.nxv7f64(<vscale x 7 x double>, define <vscale x 7 x double> @vp_roundeven_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv7f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI28_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI28_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI40_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a1) ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t @@ -985,8 +1393,8 @@ define <vscale x 7 x double> @vp_roundeven_nxv7f64(<vscale x 7 x double> %va, <v define <vscale x 7 x double> @vp_roundeven_nxv7f64_unmasked(<vscale x 7 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv7f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI29_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI29_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI41_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI41_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -1006,8 +1414,8 @@ declare <vscale x 8 x double> @llvm.vp.roundeven.nxv8f64(<vscale x 8 x double>, define <vscale x 8 x double> @vp_roundeven_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI30_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI30_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI42_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a1) ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t @@ -1029,8 +1437,8 @@ define <vscale x 8 x double> @vp_roundeven_nxv8f64(<vscale x 8 x double> %va, <v define <vscale x 8 x double> @vp_roundeven_nxv8f64_unmasked(<vscale x 8 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv8f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI31_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI31_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI43_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI43_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -1063,8 +1471,8 @@ define <vscale x 16 x double> @vp_roundeven_nxv16f64(<vscale x 16 x double> %va, ; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vx v6, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: lui a3, %hi(.LCPI32_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI32_0)(a3) +; CHECK-NEXT: lui a3, %hi(.LCPI44_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI44_0)(a3) ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 @@ -1085,10 +1493,10 @@ define <vscale x 16 x double> @vp_roundeven_nxv16f64(<vscale x 16 x double> %va, ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB32_2 +; CHECK-NEXT: bltu a0, a1, .LBB44_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB32_2: +; CHECK-NEXT: .LBB44_2: ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t @@ -1116,8 +1524,8 @@ define <vscale x 16 x double> @vp_roundeven_nxv16f64_unmasked(<vscale x 16 x dou ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: lui a3, %hi(.LCPI33_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI33_0)(a3) +; CHECK-NEXT: lui a3, %hi(.LCPI45_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI45_0)(a3) ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 @@ -1130,10 +1538,10 @@ define <vscale x 16 x double> @vp_roundeven_nxv16f64_unmasked(<vscale x 16 x dou ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB33_2 +; CHECK-NEXT: bltu a0, a1, .LBB45_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB33_2: +; CHECK-NEXT: .LBB45_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 diff --git a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll index 1227e73a0243..11830c924867 100644 --- a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll @@ -1,20 +1,428 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN +declare <vscale x 1 x bfloat> @llvm.vp.roundtozero.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x i1>, i32) + +define <vscale x 1 x bfloat> @vp_roundtozero_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_roundtozero_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfabs.v v8, v9, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vmflt.vf v0, v8, fa5, v0.t +; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 1 x bfloat> @llvm.vp.roundtozero.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x bfloat> %v +} + +define <vscale x 1 x bfloat> @vp_roundtozero_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_roundtozero_nxv1bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfabs.v v8, v9 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 1 x bfloat> @llvm.vp.roundtozero.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl) + ret <vscale x 1 x bfloat> %v +} + +declare <vscale x 2 x bfloat> @llvm.vp.roundtozero.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x i1>, i32) + +define <vscale x 2 x bfloat> @vp_roundtozero_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_roundtozero_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfabs.v v8, v9, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vmflt.vf v0, v8, fa5, v0.t +; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 2 x bfloat> @llvm.vp.roundtozero.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x bfloat> %v +} + +define <vscale x 2 x bfloat> @vp_roundtozero_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_roundtozero_nxv2bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfabs.v v8, v9 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 2 x bfloat> @llvm.vp.roundtozero.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl) + ret <vscale x 2 x bfloat> %v +} + +declare <vscale x 4 x bfloat> @llvm.vp.roundtozero.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x i1>, i32) + +define <vscale x 4 x bfloat> @vp_roundtozero_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_roundtozero_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfabs.v v12, v10, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vmflt.vf v9, v12, fa5, v0.t +; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v12, v10, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vfsgnj.vv v10, v12, v10, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %v = call <vscale x 4 x bfloat> @llvm.vp.roundtozero.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x bfloat> %v +} + +define <vscale x 4 x bfloat> @vp_roundtozero_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_roundtozero_nxv4bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfabs.v v8, v10 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %v = call <vscale x 4 x bfloat> @llvm.vp.roundtozero.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl) + ret <vscale x 4 x bfloat> %v +} + +declare <vscale x 8 x bfloat> @llvm.vp.roundtozero.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, i32) + +define <vscale x 8 x bfloat> @vp_roundtozero_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_roundtozero_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfabs.v v16, v12, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vmflt.vf v10, v16, fa5, v0.t +; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v16, v12, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vfsgnj.vv v12, v16, v12, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %v = call <vscale x 8 x bfloat> @llvm.vp.roundtozero.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x bfloat> %v +} + +define <vscale x 8 x bfloat> @vp_roundtozero_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_roundtozero_nxv8bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfabs.v v8, v12 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vfsgnj.vv v12, v8, v12, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %v = call <vscale x 8 x bfloat> @llvm.vp.roundtozero.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl) + ret <vscale x 8 x bfloat> %v +} + +declare <vscale x 16 x bfloat> @llvm.vp.roundtozero.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x i1>, i32) + +define <vscale x 16 x bfloat> @vp_roundtozero_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_roundtozero_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vmflt.vf v12, v24, fa5, v0.t +; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %v = call <vscale x 16 x bfloat> @llvm.vp.roundtozero.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x bfloat> %v +} + +define <vscale x 16 x bfloat> @vp_roundtozero_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_roundtozero_nxv16bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v8, v16 +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %v = call <vscale x 16 x bfloat> @llvm.vp.roundtozero.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl) + ret <vscale x 16 x bfloat> %v +} + +declare <vscale x 32 x bfloat> @llvm.vp.roundtozero.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x i1>, i32) + +define <vscale x 32 x bfloat> @vp_roundtozero_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_roundtozero_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a1, a2, 1 +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: sltu a4, a0, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: srli a2, a2, 2 +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v17, v0, a2 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v8, v24, v0.t +; CHECK-NEXT: lui a2, 307200 +; CHECK-NEXT: fmv.w.x fa5, a2 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vmflt.vf v17, v8, fa5, v0.t +; CHECK-NEXT: fsrmi a2, 1 +; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t +; CHECK-NEXT: fsrm a2 +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24 +; CHECK-NEXT: bltu a0, a1, .LBB10_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB10_2: +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v0 +; CHECK-NEXT: vmv1r.v v8, v16 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v16, v24, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vmflt.vf v8, v16, fa5, v0.t +; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call <vscale x 32 x bfloat> @llvm.vp.roundtozero.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x bfloat> %v +} + +define <vscale x 32 x bfloat> @vp_roundtozero_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, i32 zeroext %evl) { +; CHECK-LABEL: vp_roundtozero_nxv32bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a1, a2, 1 +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: sltu a4, a0, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: srli a2, a2, 2 +; CHECK-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v16, v16, a2 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v8, v24, v0.t +; CHECK-NEXT: lui a2, 307200 +; CHECK-NEXT: fmv.w.x fa5, a2 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vmflt.vf v16, v8, fa5, v0.t +; CHECK-NEXT: fsrmi a2, 1 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t +; CHECK-NEXT: fsrm a2 +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24 +; CHECK-NEXT: bltu a0, a1, .LBB11_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB11_2: +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v16 +; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call <vscale x 32 x bfloat> @llvm.vp.roundtozero.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl) + ret <vscale x 32 x bfloat> %v +} declare <vscale x 1 x half> @llvm.vp.roundtozero.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32) define <vscale x 1 x half> @vp_roundtozero_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_nxv1f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI0_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI12_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI12_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu @@ -55,8 +463,8 @@ define <vscale x 1 x half> @vp_roundtozero_nxv1f16(<vscale x 1 x half> %va, <vsc define <vscale x 1 x half> @vp_roundtozero_nxv1f16_unmasked(<vscale x 1 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_nxv1f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI1_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI13_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI13_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 @@ -95,8 +503,8 @@ declare <vscale x 2 x half> @llvm.vp.roundtozero.nxv2f16(<vscale x 2 x half>, <v define <vscale x 2 x half> @vp_roundtozero_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_nxv2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI2_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI2_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI14_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI14_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu @@ -137,8 +545,8 @@ define <vscale x 2 x half> @vp_roundtozero_nxv2f16(<vscale x 2 x half> %va, <vsc define <vscale x 2 x half> @vp_roundtozero_nxv2f16_unmasked(<vscale x 2 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_nxv2f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI3_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI3_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI15_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI15_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 @@ -177,8 +585,8 @@ declare <vscale x 4 x half> @llvm.vp.roundtozero.nxv4f16(<vscale x 4 x half>, <v define <vscale x 4 x half> @vp_roundtozero_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_nxv4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI4_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI4_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI16_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI16_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu @@ -221,8 +629,8 @@ define <vscale x 4 x half> @vp_roundtozero_nxv4f16(<vscale x 4 x half> %va, <vsc define <vscale x 4 x half> @vp_roundtozero_nxv4f16_unmasked(<vscale x 4 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_nxv4f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI5_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI5_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI17_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI17_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 @@ -261,8 +669,8 @@ declare <vscale x 8 x half> @llvm.vp.roundtozero.nxv8f16(<vscale x 8 x half>, <v define <vscale x 8 x half> @vp_roundtozero_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_nxv8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI6_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI18_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI18_0)(a1) ; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8, v0.t @@ -307,8 +715,8 @@ define <vscale x 8 x half> @vp_roundtozero_nxv8f16(<vscale x 8 x half> %va, <vsc define <vscale x 8 x half> @vp_roundtozero_nxv8f16_unmasked(<vscale x 8 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_nxv8f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI7_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI19_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI19_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 ; ZVFH-NEXT: vmflt.vf v0, v10, fa5 @@ -347,8 +755,8 @@ declare <vscale x 16 x half> @llvm.vp.roundtozero.nxv16f16(<vscale x 16 x half>, define <vscale x 16 x half> @vp_roundtozero_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_nxv16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI8_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI20_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI20_0)(a1) ; ZVFH-NEXT: vmv1r.v v12, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8, v0.t @@ -393,8 +801,8 @@ define <vscale x 16 x half> @vp_roundtozero_nxv16f16(<vscale x 16 x half> %va, < define <vscale x 16 x half> @vp_roundtozero_nxv16f16_unmasked(<vscale x 16 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_nxv16f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI9_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI21_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI21_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8 ; ZVFH-NEXT: vmflt.vf v0, v12, fa5 @@ -433,8 +841,8 @@ declare <vscale x 32 x half> @llvm.vp.roundtozero.nxv32f16(<vscale x 32 x half>, define <vscale x 32 x half> @vp_roundtozero_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_nxv32f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI10_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI22_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI22_0)(a1) ; ZVFH-NEXT: vmv1r.v v16, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v24, v8, v0.t @@ -489,10 +897,10 @@ define <vscale x 32 x half> @vp_roundtozero_nxv32f16(<vscale x 32 x half> %va, < ; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 -; ZVFHMIN-NEXT: bltu a0, a1, .LBB10_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB22_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB10_2: +; ZVFHMIN-NEXT: .LBB22_2: ; ZVFHMIN-NEXT: addi a1, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 @@ -531,8 +939,8 @@ define <vscale x 32 x half> @vp_roundtozero_nxv32f16(<vscale x 32 x half> %va, < define <vscale x 32 x half> @vp_roundtozero_nxv32f16_unmasked(<vscale x 32 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_nxv32f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI11_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a1) +; ZVFH-NEXT: lui a1, %hi(.LCPI23_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI23_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8 ; ZVFH-NEXT: vmflt.vf v0, v16, fa5 @@ -584,10 +992,10 @@ define <vscale x 32 x half> @vp_roundtozero_nxv32f16_unmasked(<vscale x 32 x hal ; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 -; ZVFHMIN-NEXT: bltu a0, a1, .LBB11_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB11_2: +; ZVFHMIN-NEXT: .LBB23_2: ; ZVFHMIN-NEXT: addi a1, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 @@ -832,8 +1240,8 @@ declare <vscale x 1 x double> @llvm.vp.roundtozero.nxv1f64(<vscale x 1 x double> define <vscale x 1 x double> @vp_roundtozero_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI22_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI34_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI34_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu @@ -853,8 +1261,8 @@ define <vscale x 1 x double> @vp_roundtozero_nxv1f64(<vscale x 1 x double> %va, define <vscale x 1 x double> @vp_roundtozero_nxv1f64_unmasked(<vscale x 1 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv1f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI23_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI35_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI35_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -874,8 +1282,8 @@ declare <vscale x 2 x double> @llvm.vp.roundtozero.nxv2f64(<vscale x 2 x double> define <vscale x 2 x double> @vp_roundtozero_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI24_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI36_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a1) ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t @@ -897,8 +1305,8 @@ define <vscale x 2 x double> @vp_roundtozero_nxv2f64(<vscale x 2 x double> %va, define <vscale x 2 x double> @vp_roundtozero_nxv2f64_unmasked(<vscale x 2 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv2f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI25_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI37_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI37_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -918,8 +1326,8 @@ declare <vscale x 4 x double> @llvm.vp.roundtozero.nxv4f64(<vscale x 4 x double> define <vscale x 4 x double> @vp_roundtozero_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI26_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI38_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a1) ; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t @@ -941,8 +1349,8 @@ define <vscale x 4 x double> @vp_roundtozero_nxv4f64(<vscale x 4 x double> %va, define <vscale x 4 x double> @vp_roundtozero_nxv4f64_unmasked(<vscale x 4 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv4f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI27_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI39_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI39_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -962,8 +1370,8 @@ declare <vscale x 7 x double> @llvm.vp.roundtozero.nxv7f64(<vscale x 7 x double> define <vscale x 7 x double> @vp_roundtozero_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv7f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI28_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI28_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI40_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a1) ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t @@ -985,8 +1393,8 @@ define <vscale x 7 x double> @vp_roundtozero_nxv7f64(<vscale x 7 x double> %va, define <vscale x 7 x double> @vp_roundtozero_nxv7f64_unmasked(<vscale x 7 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv7f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI29_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI29_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI41_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI41_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -1006,8 +1414,8 @@ declare <vscale x 8 x double> @llvm.vp.roundtozero.nxv8f64(<vscale x 8 x double> define <vscale x 8 x double> @vp_roundtozero_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI30_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI30_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI42_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a1) ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t @@ -1029,8 +1437,8 @@ define <vscale x 8 x double> @vp_roundtozero_nxv8f64(<vscale x 8 x double> %va, define <vscale x 8 x double> @vp_roundtozero_nxv8f64_unmasked(<vscale x 8 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv8f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI31_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI31_0)(a1) +; CHECK-NEXT: lui a1, %hi(.LCPI43_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI43_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -1063,8 +1471,8 @@ define <vscale x 16 x double> @vp_roundtozero_nxv16f64(<vscale x 16 x double> %v ; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vx v6, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: lui a3, %hi(.LCPI32_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI32_0)(a3) +; CHECK-NEXT: lui a3, %hi(.LCPI44_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI44_0)(a3) ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 @@ -1085,10 +1493,10 @@ define <vscale x 16 x double> @vp_roundtozero_nxv16f64(<vscale x 16 x double> %v ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB32_2 +; CHECK-NEXT: bltu a0, a1, .LBB44_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB32_2: +; CHECK-NEXT: .LBB44_2: ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t @@ -1116,8 +1524,8 @@ define <vscale x 16 x double> @vp_roundtozero_nxv16f64_unmasked(<vscale x 16 x d ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: lui a3, %hi(.LCPI33_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI33_0)(a3) +; CHECK-NEXT: lui a3, %hi(.LCPI45_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI45_0)(a3) ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 @@ -1130,10 +1538,10 @@ define <vscale x 16 x double> @vp_roundtozero_nxv16f64_unmasked(<vscale x 16 x d ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB33_2 +; CHECK-NEXT: bltu a0, a1, .LBB45_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB33_2: +; CHECK-NEXT: .LBB45_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll index e9b6126323de..5ba4efa8458c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll @@ -1,12 +1,1664 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfhmin,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfhmin,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN + +declare <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x bfloat>, metadata, <vscale x 1 x i1>, i32) + +define <vscale x 1 x i1> @fcmp_oeq_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_oeq_vv_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v9, v10, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"oeq", <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i1> %v +} + +define <vscale x 1 x i1> @fcmp_oeq_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_oeq_vf_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v10, v8, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"oeq", <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i1> %v +} + +define <vscale x 1 x i1> @fcmp_oeq_vf_swap_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_oeq_vf_swap_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v10, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %va, metadata !"oeq", <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i1> %v +} + +define <vscale x 1 x i1> @fcmp_ogt_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_ogt_vv_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmflt.vv v0, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"ogt", <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i1> %v +} + +define <vscale x 1 x i1> @fcmp_ogt_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_ogt_vf_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmflt.vv v0, v8, v10, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"ogt", <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i1> %v +} + +define <vscale x 1 x i1> @fcmp_ogt_vf_swap_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_ogt_vf_swap_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %va, metadata !"ogt", <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i1> %v +} + +define <vscale x 1 x i1> @fcmp_oge_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_oge_vv_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmfle.vv v0, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"oge", <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i1> %v +} + +define <vscale x 1 x i1> @fcmp_oge_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_oge_vf_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmfle.vv v0, v8, v10, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"oge", <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i1> %v +} + +define <vscale x 1 x i1> @fcmp_oge_vf_swap_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_oge_vf_swap_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmfle.vv v0, v10, v8, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %va, metadata !"oge", <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i1> %v +} + +define <vscale x 1 x i1> @fcmp_olt_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_olt_vv_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmflt.vv v0, v9, v10, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"olt", <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i1> %v +} + +define <vscale x 1 x i1> @fcmp_olt_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_olt_vf_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"olt", <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i1> %v +} + +define <vscale x 1 x i1> @fcmp_olt_vf_swap_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_olt_vf_swap_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmflt.vv v0, v8, v10, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %va, metadata !"olt", <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i1> %v +} + +define <vscale x 1 x i1> @fcmp_ole_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_ole_vv_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmfle.vv v0, v9, v10, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"ole", <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i1> %v +} + +define <vscale x 1 x i1> @fcmp_ole_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_ole_vf_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmfle.vv v0, v10, v8, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"ole", <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i1> %v +} + +define <vscale x 1 x i1> @fcmp_ole_vf_swap_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_ole_vf_swap_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmfle.vv v0, v8, v10, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %va, metadata !"ole", <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i1> %v +} + +define <vscale x 1 x i1> @fcmp_one_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_one_vv_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmflt.vv v8, v9, v10, v0.t +; CHECK-NEXT: vmflt.vv v9, v10, v9, v0.t +; CHECK-NEXT: vmor.mm v0, v9, v8 +; CHECK-NEXT: ret + %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"one", <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i1> %v +} + +define <vscale x 1 x i1> @fcmp_one_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_one_vf_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmflt.vv v9, v10, v8, v0.t +; CHECK-NEXT: vmflt.vv v8, v8, v10, v0.t +; CHECK-NEXT: vmor.mm v0, v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"one", <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i1> %v +} + +define <vscale x 1 x i1> @fcmp_one_vf_swap_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_one_vf_swap_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmflt.vv v9, v8, v10, v0.t +; CHECK-NEXT: vmflt.vv v8, v10, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %va, metadata !"one", <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i1> %v +} + +define <vscale x 1 x i1> @fcmp_ord_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_ord_vv_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmfeq.vv v9, v10, v10, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmfeq.vv v8, v10, v10, v0.t +; CHECK-NEXT: vmand.mm v0, v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"ord", <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i1> %v +} + +define <vscale x 1 x i1> @fcmp_ord_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_ord_vf_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmfeq.vv v8, v10, v10, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmfeq.vv v9, v10, v10, v0.t +; CHECK-NEXT: vmand.mm v0, v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"ord", <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i1> %v +} + +define <vscale x 1 x i1> @fcmp_ord_vf_swap_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_ord_vf_swap_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmfeq.vv v8, v10, v10, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmfeq.vv v9, v10, v10, v0.t +; CHECK-NEXT: vmand.mm v0, v9, v8 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %va, metadata !"ord", <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i1> %v +} + +define <vscale x 1 x i1> @fcmp_ueq_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_ueq_vv_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmflt.vv v8, v9, v10, v0.t +; CHECK-NEXT: vmflt.vv v9, v10, v9, v0.t +; CHECK-NEXT: vmnor.mm v0, v9, v8 +; CHECK-NEXT: ret + %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"ueq", <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i1> %v +} + +define <vscale x 1 x i1> @fcmp_ueq_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_ueq_vf_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmflt.vv v9, v10, v8, v0.t +; CHECK-NEXT: vmflt.vv v8, v8, v10, v0.t +; CHECK-NEXT: vmnor.mm v0, v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"ueq", <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i1> %v +} + +define <vscale x 1 x i1> @fcmp_ueq_vf_swap_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_ueq_vf_swap_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmflt.vv v9, v8, v10, v0.t +; CHECK-NEXT: vmflt.vv v8, v10, v8, v0.t +; CHECK-NEXT: vmnor.mm v0, v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %va, metadata !"ueq", <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i1> %v +} + +define <vscale x 1 x i1> @fcmp_ugt_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_ugt_vv_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmfle.vv v8, v9, v10, v0.t +; CHECK-NEXT: vmnot.m v0, v8 +; CHECK-NEXT: ret + %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"ugt", <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i1> %v +} + +define <vscale x 1 x i1> @fcmp_ugt_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_ugt_vf_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmfle.vv v8, v10, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v8 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"ugt", <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i1> %v +} + +define <vscale x 1 x i1> @fcmp_ugt_vf_swap_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_ugt_vf_swap_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmfle.vv v8, v8, v10, v0.t +; CHECK-NEXT: vmnot.m v0, v8 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %va, metadata !"ugt", <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i1> %v +} + +define <vscale x 1 x i1> @fcmp_uge_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_uge_vv_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmflt.vv v8, v9, v10, v0.t +; CHECK-NEXT: vmnot.m v0, v8 +; CHECK-NEXT: ret + %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"uge", <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i1> %v +} + +define <vscale x 1 x i1> @fcmp_uge_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_uge_vf_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmflt.vv v8, v10, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v8 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"uge", <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i1> %v +} + +define <vscale x 1 x i1> @fcmp_uge_vf_swap_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_uge_vf_swap_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmflt.vv v8, v8, v10, v0.t +; CHECK-NEXT: vmnot.m v0, v8 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %va, metadata !"uge", <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i1> %v +} + +define <vscale x 1 x i1> @fcmp_ult_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_ult_vv_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmfle.vv v8, v8, v10, v0.t +; CHECK-NEXT: vmnot.m v0, v8 +; CHECK-NEXT: ret + %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"ult", <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i1> %v +} + +define <vscale x 1 x i1> @fcmp_ult_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_ult_vf_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmfle.vv v8, v8, v10, v0.t +; CHECK-NEXT: vmnot.m v0, v8 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"ult", <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i1> %v +} + +define <vscale x 1 x i1> @fcmp_ult_vf_swap_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_ult_vf_swap_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmfle.vv v8, v10, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v8 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %va, metadata !"ult", <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i1> %v +} + +define <vscale x 1 x i1> @fcmp_ule_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_ule_vv_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmflt.vv v8, v8, v10, v0.t +; CHECK-NEXT: vmnot.m v0, v8 +; CHECK-NEXT: ret + %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"ule", <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i1> %v +} + +define <vscale x 1 x i1> @fcmp_ule_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_ule_vf_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmflt.vv v8, v8, v10, v0.t +; CHECK-NEXT: vmnot.m v0, v8 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"ule", <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i1> %v +} + +define <vscale x 1 x i1> @fcmp_ule_vf_swap_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_ule_vf_swap_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmflt.vv v8, v10, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v8 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %va, metadata !"ule", <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i1> %v +} + +define <vscale x 1 x i1> @fcmp_une_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_une_vv_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmfne.vv v0, v9, v10, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"une", <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i1> %v +} + +define <vscale x 1 x i1> @fcmp_une_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_une_vf_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmfne.vv v0, v10, v8, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"une", <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i1> %v +} + +define <vscale x 1 x i1> @fcmp_une_vf_swap_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_une_vf_swap_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmfne.vv v0, v8, v10, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %va, metadata !"une", <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i1> %v +} + +define <vscale x 1 x i1> @fcmp_uno_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_uno_vv_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmfne.vv v9, v10, v10, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmfne.vv v8, v10, v10, v0.t +; CHECK-NEXT: vmor.mm v0, v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"uno", <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i1> %v +} + +define <vscale x 1 x i1> @fcmp_uno_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_uno_vf_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmfne.vv v8, v10, v10, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmfne.vv v9, v10, v10, v0.t +; CHECK-NEXT: vmor.mm v0, v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"uno", <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i1> %v +} + +define <vscale x 1 x i1> @fcmp_uno_vf_swap_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_uno_vf_swap_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmfne.vv v8, v10, v10, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmfne.vv v9, v10, v10, v0.t +; CHECK-NEXT: vmor.mm v0, v9, v8 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %va, metadata !"uno", <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i1> %v +} + +declare <vscale x 3 x i1> @llvm.vp.fcmp.nxv3bf16(<vscale x 3 x bfloat>, <vscale x 3 x bfloat>, metadata, <vscale x 3 x i1>, i32) + +define <vscale x 3 x i1> @fcmp_oeq_vv_nxv3bf16(<vscale x 3 x bfloat> %va, <vscale x 3 x bfloat> %vb, <vscale x 3 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_oeq_vv_nxv3bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmfeq.vv v8, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: ret + %v = call <vscale x 3 x i1> @llvm.vp.fcmp.nxv3bf16(<vscale x 3 x bfloat> %va, <vscale x 3 x bfloat> %vb, metadata !"oeq", <vscale x 3 x i1> %m, i32 %evl) + ret <vscale x 3 x i1> %v +} + +declare <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, metadata, <vscale x 8 x i1>, i32) + +define <vscale x 8 x i1> @fcmp_oeq_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_oeq_vv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmfeq.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: ret + %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"oeq", <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i1> %v +} + +define <vscale x 8 x i1> @fcmp_oeq_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_oeq_vf_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmfeq.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"oeq", <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i1> %v +} + +define <vscale x 8 x i1> @fcmp_oeq_vf_swap_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_oeq_vf_swap_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmfeq.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %va, metadata !"oeq", <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i1> %v +} + +define <vscale x 8 x i1> @fcmp_ogt_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_ogt_vv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmflt.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: ret + %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"ogt", <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i1> %v +} + +define <vscale x 8 x i1> @fcmp_ogt_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_ogt_vf_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmflt.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"ogt", <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i1> %v +} + +define <vscale x 8 x i1> @fcmp_ogt_vf_swap_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_ogt_vf_swap_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmflt.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %va, metadata !"ogt", <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i1> %v +} + +define <vscale x 8 x i1> @fcmp_oge_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_oge_vv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmfle.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: ret + %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"oge", <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i1> %v +} + +define <vscale x 8 x i1> @fcmp_oge_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_oge_vf_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmfle.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"oge", <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i1> %v +} + +define <vscale x 8 x i1> @fcmp_oge_vf_swap_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_oge_vf_swap_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmfle.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %va, metadata !"oge", <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i1> %v +} + +define <vscale x 8 x i1> @fcmp_olt_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_olt_vv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmflt.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: ret + %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"olt", <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i1> %v +} + +define <vscale x 8 x i1> @fcmp_olt_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_olt_vf_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmflt.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"olt", <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i1> %v +} + +define <vscale x 8 x i1> @fcmp_olt_vf_swap_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_olt_vf_swap_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmflt.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %va, metadata !"olt", <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i1> %v +} + +define <vscale x 8 x i1> @fcmp_ole_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_ole_vv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmfle.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: ret + %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"ole", <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i1> %v +} + +define <vscale x 8 x i1> @fcmp_ole_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_ole_vf_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmfle.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"ole", <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i1> %v +} + +define <vscale x 8 x i1> @fcmp_ole_vf_swap_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_ole_vf_swap_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmfle.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %va, metadata !"ole", <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i1> %v +} + +define <vscale x 8 x i1> @fcmp_one_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_one_vv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmflt.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmflt.vv v9, v12, v16, v0.t +; CHECK-NEXT: vmor.mm v0, v9, v8 +; CHECK-NEXT: ret + %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"one", <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i1> %v +} + +define <vscale x 8 x i1> @fcmp_one_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_one_vf_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmflt.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmflt.vv v9, v16, v12, v0.t +; CHECK-NEXT: vmor.mm v0, v9, v8 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"one", <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i1> %v +} + +define <vscale x 8 x i1> @fcmp_one_vf_swap_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_one_vf_swap_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmflt.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmflt.vv v9, v12, v16, v0.t +; CHECK-NEXT: vmor.mm v0, v9, v8 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %va, metadata !"one", <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i1> %v +} + +define <vscale x 8 x i1> @fcmp_ord_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_ord_vv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmfeq.vv v10, v12, v12, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmfeq.vv v8, v12, v12, v0.t +; CHECK-NEXT: vmand.mm v0, v8, v10 +; CHECK-NEXT: ret + %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"ord", <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i1> %v +} + +define <vscale x 8 x i1> @fcmp_ord_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_ord_vf_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmfeq.vv v8, v12, v12, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmfeq.vv v9, v12, v12, v0.t +; CHECK-NEXT: vmand.mm v0, v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"ord", <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i1> %v +} + +define <vscale x 8 x i1> @fcmp_ord_vf_swap_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_ord_vf_swap_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmfeq.vv v8, v12, v12, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmfeq.vv v9, v12, v12, v0.t +; CHECK-NEXT: vmand.mm v0, v9, v8 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %va, metadata !"ord", <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i1> %v +} + +define <vscale x 8 x i1> @fcmp_ueq_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_ueq_vv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmflt.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmflt.vv v9, v12, v16, v0.t +; CHECK-NEXT: vmnor.mm v0, v9, v8 +; CHECK-NEXT: ret + %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"ueq", <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i1> %v +} + +define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_ueq_vf_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmflt.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmflt.vv v9, v16, v12, v0.t +; CHECK-NEXT: vmnor.mm v0, v9, v8 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"ueq", <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i1> %v +} + +define <vscale x 8 x i1> @fcmp_ueq_vf_swap_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_ueq_vf_swap_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmflt.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmflt.vv v9, v12, v16, v0.t +; CHECK-NEXT: vmnor.mm v0, v9, v8 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %va, metadata !"ueq", <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i1> %v +} + +define <vscale x 8 x i1> @fcmp_ugt_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_ugt_vv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmfle.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmnot.m v0, v8 +; CHECK-NEXT: ret + %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"ugt", <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i1> %v +} + +define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_ugt_vf_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmfle.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmnot.m v0, v8 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"ugt", <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i1> %v +} + +define <vscale x 8 x i1> @fcmp_ugt_vf_swap_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_ugt_vf_swap_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmfle.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmnot.m v0, v8 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %va, metadata !"ugt", <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i1> %v +} + +define <vscale x 8 x i1> @fcmp_uge_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_uge_vv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmflt.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmnot.m v0, v8 +; CHECK-NEXT: ret + %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"uge", <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i1> %v +} + +define <vscale x 8 x i1> @fcmp_uge_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_uge_vf_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmflt.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmnot.m v0, v8 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"uge", <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i1> %v +} + +define <vscale x 8 x i1> @fcmp_uge_vf_swap_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_uge_vf_swap_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmflt.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmnot.m v0, v8 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %va, metadata !"uge", <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i1> %v +} + +define <vscale x 8 x i1> @fcmp_ult_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_ult_vv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmfle.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmnot.m v0, v8 +; CHECK-NEXT: ret + %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"ult", <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i1> %v +} + +define <vscale x 8 x i1> @fcmp_ult_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_ult_vf_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmfle.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmnot.m v0, v8 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"ult", <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i1> %v +} + +define <vscale x 8 x i1> @fcmp_ult_vf_swap_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_ult_vf_swap_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmfle.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmnot.m v0, v8 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %va, metadata !"ult", <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i1> %v +} + +define <vscale x 8 x i1> @fcmp_ule_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_ule_vv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmflt.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmnot.m v0, v8 +; CHECK-NEXT: ret + %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"ule", <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i1> %v +} + +define <vscale x 8 x i1> @fcmp_ule_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_ule_vf_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmflt.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmnot.m v0, v8 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"ule", <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i1> %v +} + +define <vscale x 8 x i1> @fcmp_ule_vf_swap_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_ule_vf_swap_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmflt.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmnot.m v0, v8 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %va, metadata !"ule", <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i1> %v +} + +define <vscale x 8 x i1> @fcmp_une_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_une_vv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmfne.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: ret + %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"une", <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i1> %v +} + +define <vscale x 8 x i1> @fcmp_une_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_une_vf_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmfne.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"une", <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i1> %v +} + +define <vscale x 8 x i1> @fcmp_une_vf_swap_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_une_vf_swap_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmfne.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %va, metadata !"une", <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i1> %v +} + +define <vscale x 8 x i1> @fcmp_uno_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_uno_vv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmfne.vv v10, v12, v12, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmfne.vv v8, v12, v12, v0.t +; CHECK-NEXT: vmor.mm v0, v8, v10 +; CHECK-NEXT: ret + %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"uno", <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i1> %v +} + +define <vscale x 8 x i1> @fcmp_uno_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_uno_vf_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmfne.vv v8, v12, v12, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmfne.vv v9, v12, v12, v0.t +; CHECK-NEXT: vmor.mm v0, v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"uno", <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i1> %v +} + +define <vscale x 8 x i1> @fcmp_uno_vf_swap_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_uno_vf_swap_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmfne.vv v8, v12, v12, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmfne.vv v9, v12, v12, v0.t +; CHECK-NEXT: vmor.mm v0, v9, v8 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %va, metadata !"uno", <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i1> %v +} + +declare <vscale x 64 x i1> @llvm.vp.fcmp.nxv64bf16(<vscale x 64 x bfloat>, <vscale x 64 x bfloat>, metadata, <vscale x 64 x i1>, i32) + +define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64bf16(<vscale x 64 x bfloat> %va, <vscale x 64 x bfloat> %vb, <vscale x 64 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_oeq_vv_nxv64bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: mv a3, a1 +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, a1, a3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x22, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 34 * vlenb +; CHECK-NEXT: vmv8r.v v24, v16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: mv a3, a1 +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a1, a3, 3 +; CHECK-NEXT: add a1, a0, a1 +; CHECK-NEXT: vl8re16.v v16, (a1) +; CHECK-NEXT: slli a5, a3, 2 +; CHECK-NEXT: sub a1, a2, a5 +; CHECK-NEXT: sltu a4, a2, a1 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a6, a4, a1 +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: sub a1, a6, a4 +; CHECK-NEXT: sltu a7, a6, a1 +; CHECK-NEXT: addi a7, a7, -1 +; CHECK-NEXT: and a7, a7, a1 +; CHECK-NEXT: srli a1, a3, 1 +; CHECK-NEXT: csrr t0, vlenb +; CHECK-NEXT: add t0, sp, t0 +; CHECK-NEXT: addi t0, t0, 16 +; CHECK-NEXT: vs1r.v v0, (t0) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli t0, zero, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vx v8, v0, a1 +; CHECK-NEXT: srli a3, a3, 2 +; CHECK-NEXT: addi t0, sp, 16 +; CHECK-NEXT: vs1r.v v8, (t0) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli t0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v8, a3 +; CHECK-NEXT: vl8re16.v v8, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: mv t0, a0 +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add t0, t0, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, t0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20 +; CHECK-NEXT: vmv4r.v v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: mv t0, a0 +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, a0, t0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v28 +; CHECK-NEXT: vsetvli zero, a7, e32, m8, ta, ma +; CHECK-NEXT: vmfeq.vv v26, v16, v8, v0.t +; CHECK-NEXT: bltu a6, a4, .LBB85_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a6, a4 +; CHECK-NEXT: .LBB85_2: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: mv a7, a0 +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, a0, a7 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a6, e32, m8, ta, ma +; CHECK-NEXT: vmfeq.vv v6, v16, v8, v0.t +; CHECK-NEXT: add a0, a3, a3 +; CHECK-NEXT: bltu a2, a5, .LBB85_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: mv a2, a5 +; CHECK-NEXT: .LBB85_4: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vslideup.vx v6, v26, a3 +; CHECK-NEXT: sub a5, a2, a4 +; CHECK-NEXT: sltu a6, a2, a5 +; CHECK-NEXT: addi a6, a6, -1 +; CHECK-NEXT: and a5, a6, a5 +; CHECK-NEXT: csrr a6, vlenb +; CHECK-NEXT: add a6, sp, a6 +; CHECK-NEXT: addi a6, a6, 16 +; CHECK-NEXT: vl1r.v v8, (a6) # Unknown-size Folded Reload +; CHECK-NEXT: vmv1r.v v7, v8 +; CHECK-NEXT: vsetvli a6, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v8, a3 +; CHECK-NEXT: csrr a6, vlenb +; CHECK-NEXT: slli a6, a6, 1 +; CHECK-NEXT: mv a7, a6 +; CHECK-NEXT: slli a6, a6, 3 +; CHECK-NEXT: add a6, a6, a7 +; CHECK-NEXT: add a6, sp, a6 +; CHECK-NEXT: addi a6, a6, 16 +; CHECK-NEXT: vl8r.v v24, (a6) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli a6, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v28 +; CHECK-NEXT: csrr a6, vlenb +; CHECK-NEXT: slli a6, a6, 1 +; CHECK-NEXT: mv a7, a6 +; CHECK-NEXT: slli a6, a6, 2 +; CHECK-NEXT: add a6, a6, a7 +; CHECK-NEXT: add a6, sp, a6 +; CHECK-NEXT: addi a6, a6, 16 +; CHECK-NEXT: vs8r.v v8, (a6) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a6, vlenb +; CHECK-NEXT: slli a6, a6, 1 +; CHECK-NEXT: mv a7, a6 +; CHECK-NEXT: slli a6, a6, 2 +; CHECK-NEXT: add a7, a7, a6 +; CHECK-NEXT: slli a6, a6, 1 +; CHECK-NEXT: add a6, a6, a7 +; CHECK-NEXT: add a6, sp, a6 +; CHECK-NEXT: addi a6, a6, 16 +; CHECK-NEXT: vl8r.v v16, (a6) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20 +; CHECK-NEXT: csrr a6, vlenb +; CHECK-NEXT: slli a6, a6, 1 +; CHECK-NEXT: mv a7, a6 +; CHECK-NEXT: slli a6, a6, 2 +; CHECK-NEXT: add a6, a6, a7 +; CHECK-NEXT: add a6, sp, a6 +; CHECK-NEXT: addi a6, a6, 16 +; CHECK-NEXT: vl8r.v v16, (a6) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a5, e32, m8, ta, ma +; CHECK-NEXT: vmfeq.vv v5, v16, v8, v0.t +; CHECK-NEXT: bltu a2, a4, .LBB85_6 +; CHECK-NEXT: # %bb.5: +; CHECK-NEXT: mv a2, a4 +; CHECK-NEXT: .LBB85_6: +; CHECK-NEXT: vsetvli a4, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24 +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: slli a4, a4, 1 +; CHECK-NEXT: mv a5, a4 +; CHECK-NEXT: slli a4, a4, 2 +; CHECK-NEXT: add a5, a5, a4 +; CHECK-NEXT: slli a4, a4, 1 +; CHECK-NEXT: add a4, a4, a5 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; CHECK-NEXT: vmfeq.vv v8, v16, v24, v0.t +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vslideup.vx v8, v5, a3 +; CHECK-NEXT: add a0, a1, a1 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vslideup.vx v8, v6, a1 +; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call <vscale x 64 x i1> @llvm.vp.fcmp.nxv64bf16(<vscale x 64 x bfloat> %va, <vscale x 64 x bfloat> %vb, metadata !"oeq", <vscale x 64 x i1> %m, i32 %evl) + ret <vscale x 64 x i1> %v +} declare <vscale x 1 x i1> @llvm.vp.fcmp.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, metadata, <vscale x 1 x i1>, i32) @@ -2108,10 +3760,10 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal ; ZVFH-NEXT: and a4, a5, a4 ; ZVFH-NEXT: vsetvli zero, a4, e16, m8, ta, ma ; ZVFH-NEXT: vmfeq.vv v7, v16, v8, v0.t -; ZVFH-NEXT: bltu a2, a3, .LBB85_2 +; ZVFH-NEXT: bltu a2, a3, .LBB171_2 ; ZVFH-NEXT: # %bb.1: ; ZVFH-NEXT: mv a2, a3 -; ZVFH-NEXT: .LBB85_2: +; ZVFH-NEXT: .LBB171_2: ; ZVFH-NEXT: vmv1r.v v0, v24 ; ZVFH-NEXT: csrr a0, vlenb ; ZVFH-NEXT: slli a0, a0, 3 @@ -2137,14 +3789,18 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a3, 34 -; ZVFHMIN-NEXT: mul a1, a1, a3 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: mv a3, a1 +; ZVFHMIN-NEXT: slli a1, a1, 4 +; ZVFHMIN-NEXT: add a1, a1, a3 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x22, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 34 * vlenb ; ZVFHMIN-NEXT: vmv8r.v v24, v16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a3, 18 -; ZVFHMIN-NEXT: mul a1, a1, a3 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: mv a3, a1 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: add a1, a1, a3 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill @@ -2176,8 +3832,12 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal ; ZVFHMIN-NEXT: vslidedown.vx v0, v8, a3 ; ZVFHMIN-NEXT: vl8re16.v v8, (a0) ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li t0, 26 -; ZVFHMIN-NEXT: mul a0, a0, t0 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: mv t0, a0 +; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: add t0, t0, a0 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: add a0, a0, t0 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill @@ -2190,18 +3850,20 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20 ; ZVFHMIN-NEXT: vmv4r.v v16, v24 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li t0, 10 -; ZVFHMIN-NEXT: mul a0, a0, t0 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: mv t0, a0 +; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: add a0, a0, t0 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28 ; ZVFHMIN-NEXT: vsetvli zero, a7, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v26, v16, v8, v0.t -; ZVFHMIN-NEXT: bltu a6, a4, .LBB85_2 +; ZVFHMIN-NEXT: bltu a6, a4, .LBB171_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a6, a4 -; ZVFHMIN-NEXT: .LBB85_2: +; ZVFHMIN-NEXT: .LBB171_2: ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 1 ; ZVFHMIN-NEXT: add a0, sp, a0 @@ -2210,8 +3872,10 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a7, 10 -; ZVFHMIN-NEXT: mul a0, a0, a7 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: mv a7, a0 +; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: add a0, a0, a7 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload @@ -2221,10 +3885,10 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal ; ZVFHMIN-NEXT: vsetvli zero, a6, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v6, v16, v8, v0.t ; ZVFHMIN-NEXT: add a0, a3, a3 -; ZVFHMIN-NEXT: bltu a2, a5, .LBB85_4 +; ZVFHMIN-NEXT: bltu a2, a5, .LBB171_4 ; ZVFHMIN-NEXT: # %bb.3: ; ZVFHMIN-NEXT: mv a2, a5 -; ZVFHMIN-NEXT: .LBB85_4: +; ZVFHMIN-NEXT: .LBB171_4: ; ZVFHMIN-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslideup.vx v6, v26, a3 ; ZVFHMIN-NEXT: sub a5, a2, a4 @@ -2239,43 +3903,57 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal ; ZVFHMIN-NEXT: vsetvli a6, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v8, a3 ; ZVFHMIN-NEXT: csrr a6, vlenb -; ZVFHMIN-NEXT: li a7, 18 -; ZVFHMIN-NEXT: mul a6, a6, a7 +; ZVFHMIN-NEXT: slli a6, a6, 1 +; ZVFHMIN-NEXT: mv a7, a6 +; ZVFHMIN-NEXT: slli a6, a6, 3 +; ZVFHMIN-NEXT: add a6, a6, a7 ; ZVFHMIN-NEXT: add a6, sp, a6 ; ZVFHMIN-NEXT: addi a6, a6, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a6) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli a6, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28 ; ZVFHMIN-NEXT: csrr a6, vlenb -; ZVFHMIN-NEXT: li a7, 10 -; ZVFHMIN-NEXT: mul a6, a6, a7 +; ZVFHMIN-NEXT: slli a6, a6, 1 +; ZVFHMIN-NEXT: mv a7, a6 +; ZVFHMIN-NEXT: slli a6, a6, 2 +; ZVFHMIN-NEXT: add a6, a6, a7 ; ZVFHMIN-NEXT: add a6, sp, a6 ; ZVFHMIN-NEXT: addi a6, a6, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a6) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a6, vlenb -; ZVFHMIN-NEXT: li a7, 26 -; ZVFHMIN-NEXT: mul a6, a6, a7 +; ZVFHMIN-NEXT: slli a6, a6, 1 +; ZVFHMIN-NEXT: mv a7, a6 +; ZVFHMIN-NEXT: slli a6, a6, 2 +; ZVFHMIN-NEXT: add a7, a7, a6 +; ZVFHMIN-NEXT: slli a6, a6, 1 +; ZVFHMIN-NEXT: add a6, a6, a7 ; ZVFHMIN-NEXT: add a6, sp, a6 ; ZVFHMIN-NEXT: addi a6, a6, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a6) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20 ; ZVFHMIN-NEXT: csrr a6, vlenb -; ZVFHMIN-NEXT: li a7, 10 -; ZVFHMIN-NEXT: mul a6, a6, a7 +; ZVFHMIN-NEXT: slli a6, a6, 1 +; ZVFHMIN-NEXT: mv a7, a6 +; ZVFHMIN-NEXT: slli a6, a6, 2 +; ZVFHMIN-NEXT: add a6, a6, a7 ; ZVFHMIN-NEXT: add a6, sp, a6 ; ZVFHMIN-NEXT: addi a6, a6, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a6) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, a5, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v5, v16, v8, v0.t -; ZVFHMIN-NEXT: bltu a2, a4, .LBB85_6 +; ZVFHMIN-NEXT: bltu a2, a4, .LBB171_6 ; ZVFHMIN-NEXT: # %bb.5: ; ZVFHMIN-NEXT: mv a2, a4 -; ZVFHMIN-NEXT: .LBB85_6: +; ZVFHMIN-NEXT: .LBB171_6: ; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 ; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: li a5, 26 -; ZVFHMIN-NEXT: mul a4, a4, a5 +; ZVFHMIN-NEXT: slli a4, a4, 1 +; ZVFHMIN-NEXT: mv a5, a4 +; ZVFHMIN-NEXT: slli a4, a4, 2 +; ZVFHMIN-NEXT: add a5, a5, a4 +; ZVFHMIN-NEXT: slli a4, a4, 1 +; ZVFHMIN-NEXT: add a4, a4, a5 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload @@ -2290,8 +3968,10 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal ; ZVFHMIN-NEXT: vslideup.vx v8, v6, a1 ; ZVFHMIN-NEXT: vmv.v.v v0, v8 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a1, 34 -; ZVFHMIN-NEXT: mul a0, a0, a1 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret @@ -3377,163 +5057,6 @@ define <vscale x 8 x i1> @fcmp_uno_vf_swap_nxv8f64(<vscale x 8 x double> %va, do declare <vscale x 32 x i1> @llvm.vp.fcmp.nxv32f64(<vscale x 32 x double>, <vscale x 32 x double>, metadata, <vscale x 32 x i1>, i32) define <vscale x 32 x i1> @fcmp_oeq_vv_nxv32f64(<vscale x 32 x double> %va, <vscale x 32 x double> %vb, <vscale x 32 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: fcmp_oeq_vv_nxv32f64: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 48 -; CHECK-NEXT: mul a1, a1, a3 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 48 * vlenb -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 5 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 40 -; CHECK-NEXT: mul a1, a1, a3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: slli t0, a4, 3 -; CHECK-NEXT: slli a1, a4, 5 -; CHECK-NEXT: sub t1, a1, t0 -; CHECK-NEXT: srli a1, a4, 2 -; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v7, v0, a1 -; CHECK-NEXT: srli a3, a4, 3 -; CHECK-NEXT: add a5, a2, t0 -; CHECK-NEXT: vl8re64.v v8, (a5) -; CHECK-NEXT: slli t3, a4, 4 -; CHECK-NEXT: slli a5, a4, 1 -; CHECK-NEXT: vsetvli a7, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v0, a3 -; CHECK-NEXT: mv a7, a6 -; CHECK-NEXT: bltu a6, a5, .LBB171_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a7, a5 -; CHECK-NEXT: .LBB171_2: -; CHECK-NEXT: add t2, a0, t0 -; CHECK-NEXT: add t1, a2, t1 -; CHECK-NEXT: add t0, a2, t3 -; CHECK-NEXT: vl8re64.v v16, (a2) -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 4 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: sub a2, a7, a4 -; CHECK-NEXT: sltu t3, a7, a2 -; CHECK-NEXT: addi t3, t3, -1 -; CHECK-NEXT: and a2, t3, a2 -; CHECK-NEXT: csrr t3, vlenb -; CHECK-NEXT: slli t3, t3, 5 -; CHECK-NEXT: add t3, sp, t3 -; CHECK-NEXT: addi t3, t3, 16 -; CHECK-NEXT: vl8r.v v16, (t3) # Unknown-size Folded Reload -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v6, v16, v8, v0.t -; CHECK-NEXT: bltu a7, a4, .LBB171_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a7, a4 -; CHECK-NEXT: .LBB171_4: -; CHECK-NEXT: vl8re64.v v8, (t2) -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 5 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vl8re64.v v8, (t1) -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li t1, 24 -; CHECK-NEXT: mul a2, a2, t1 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v18, v7, a3 -; CHECK-NEXT: vl8re64.v v8, (t0) -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vl8re64.v v8, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 40 -; CHECK-NEXT: mul a0, a0, a2 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vsetvli zero, a7, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v17, v24, v8, v0.t -; CHECK-NEXT: add a2, a3, a3 -; CHECK-NEXT: sub a0, a6, a5 -; CHECK-NEXT: sltu a5, a6, a0 -; CHECK-NEXT: addi a5, a5, -1 -; CHECK-NEXT: and a0, a5, a0 -; CHECK-NEXT: vsetvli zero, a2, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vx v17, v6, a3 -; CHECK-NEXT: mv a2, a0 -; CHECK-NEXT: bltu a0, a4, .LBB171_6 -; CHECK-NEXT: # %bb.5: -; CHECK-NEXT: mv a2, a4 -; CHECK-NEXT: .LBB171_6: -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: slli a5, a5, 3 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload -; CHECK-NEXT: addi a5, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v16, v24, v8, v0.t -; CHECK-NEXT: sub a2, a0, a4 -; CHECK-NEXT: sltu a0, a0, a2 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a2 -; CHECK-NEXT: vmv1r.v v0, v18 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 5 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a4, 24 -; CHECK-NEXT: mul a2, a2, a4 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v18, v24, v8, v0.t -; CHECK-NEXT: add a0, a1, a3 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vx v17, v16, a1 -; CHECK-NEXT: slli a0, a3, 1 -; CHECK-NEXT: add a0, a0, a3 -; CHECK-NEXT: add a3, a0, a3 -; CHECK-NEXT: vsetvli zero, a3, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v17, v18, a0 -; CHECK-NEXT: vmv1r.v v0, v17 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 48 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret %v = call <vscale x 32 x i1> @llvm.vp.fcmp.nxv32f64(<vscale x 32 x double> %va, <vscale x 32 x double> %vb, metadata !"oeq", <vscale x 32 x i1> %m, i32 %evl) ret <vscale x 32 x i1> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll index c2c977bec605..23d73481aed2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll @@ -1,16 +1,1162 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV64 -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfhmin,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfhmin,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN64 ; FIXME: The scalar/vector operations ('fv' tests) should swap operands and ; condition codes accordingly in order to generate a 'vf' instruction. +define <vscale x 8 x i1> @fcmp_oeq_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) { +; CHECK-LABEL: fcmp_oeq_vv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v16, v12 +; CHECK-NEXT: ret + %vc = fcmp oeq <vscale x 8 x bfloat> %va, %vb + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_oeq_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: fcmp_oeq_vf_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v12, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fcmp oeq <vscale x 8 x bfloat> %va, %splat + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_oeq_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: fcmp_oeq_fv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v16, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fcmp oeq <vscale x 8 x bfloat> %splat, %va + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_oeq_vv_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) #0 { +; CHECK-LABEL: fcmp_oeq_vv_nxv8bf16_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v16, v12 +; CHECK-NEXT: ret + %vc = fcmp oeq <vscale x 8 x bfloat> %va, %vb + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_oeq_vf_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, bfloat %b) #0 { +; CHECK-LABEL: fcmp_oeq_vf_nxv8bf16_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v12, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fcmp oeq <vscale x 8 x bfloat> %va, %splat + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_ogt_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) { +; CHECK-LABEL: fcmp_ogt_vv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmflt.vv v0, v16, v12 +; CHECK-NEXT: ret + %vc = fcmp ogt <vscale x 8 x bfloat> %va, %vb + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_ogt_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: fcmp_ogt_vf_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmflt.vv v0, v16, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fcmp ogt <vscale x 8 x bfloat> %va, %splat + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_ogt_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: fcmp_ogt_fv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmflt.vv v0, v12, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fcmp ogt <vscale x 8 x bfloat> %splat, %va + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_ogt_vv_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) #0 { +; CHECK-LABEL: fcmp_ogt_vv_nxv8bf16_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmflt.vv v0, v16, v12 +; CHECK-NEXT: ret + %vc = fcmp ogt <vscale x 8 x bfloat> %va, %vb + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_ogt_vf_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, bfloat %b) #0 { +; CHECK-LABEL: fcmp_ogt_vf_nxv8bf16_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmflt.vv v0, v16, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fcmp ogt <vscale x 8 x bfloat> %va, %splat + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_oge_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) { +; CHECK-LABEL: fcmp_oge_vv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfle.vv v0, v16, v12 +; CHECK-NEXT: ret + %vc = fcmp oge <vscale x 8 x bfloat> %va, %vb + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_oge_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: fcmp_oge_vf_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfle.vv v0, v16, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fcmp oge <vscale x 8 x bfloat> %va, %splat + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_oge_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: fcmp_oge_fv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfle.vv v0, v12, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fcmp oge <vscale x 8 x bfloat> %splat, %va + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_oge_vv_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) #0 { +; CHECK-LABEL: fcmp_oge_vv_nxv8bf16_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfle.vv v0, v16, v12 +; CHECK-NEXT: ret + %vc = fcmp oge <vscale x 8 x bfloat> %va, %vb + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_oge_vf_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, bfloat %b) #0 { +; CHECK-LABEL: fcmp_oge_vf_nxv8bf16_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfle.vv v0, v16, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fcmp oge <vscale x 8 x bfloat> %va, %splat + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_olt_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) { +; CHECK-LABEL: fcmp_olt_vv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmflt.vv v0, v16, v12 +; CHECK-NEXT: ret + %vc = fcmp olt <vscale x 8 x bfloat> %va, %vb + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_olt_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: fcmp_olt_vf_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmflt.vv v0, v12, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fcmp olt <vscale x 8 x bfloat> %va, %splat + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_olt_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: fcmp_olt_fv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmflt.vv v0, v16, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fcmp olt <vscale x 8 x bfloat> %splat, %va + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_olt_vv_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) #0 { +; CHECK-LABEL: fcmp_olt_vv_nxv8bf16_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmflt.vv v0, v16, v12 +; CHECK-NEXT: ret + %vc = fcmp olt <vscale x 8 x bfloat> %va, %vb + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_olt_vf_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, bfloat %b) #0 { +; CHECK-LABEL: fcmp_olt_vf_nxv8bf16_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmflt.vv v0, v12, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fcmp olt <vscale x 8 x bfloat> %va, %splat + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_ole_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) { +; CHECK-LABEL: fcmp_ole_vv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfle.vv v0, v16, v12 +; CHECK-NEXT: ret + %vc = fcmp ole <vscale x 8 x bfloat> %va, %vb + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_ole_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: fcmp_ole_vf_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfle.vv v0, v12, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fcmp ole <vscale x 8 x bfloat> %va, %splat + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_ole_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: fcmp_ole_fv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfle.vv v0, v16, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fcmp ole <vscale x 8 x bfloat> %splat, %va + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_ole_vv_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) #0 { +; CHECK-LABEL: fcmp_ole_vv_nxv8bf16_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfle.vv v0, v16, v12 +; CHECK-NEXT: ret + %vc = fcmp ole <vscale x 8 x bfloat> %va, %vb + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_ole_vf_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, bfloat %b) #0 { +; CHECK-LABEL: fcmp_ole_vf_nxv8bf16_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfle.vv v0, v12, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fcmp ole <vscale x 8 x bfloat> %va, %splat + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_one_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) { +; CHECK-LABEL: fcmp_one_vv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmflt.vv v8, v16, v12 +; CHECK-NEXT: vmflt.vv v9, v12, v16 +; CHECK-NEXT: vmor.mm v0, v9, v8 +; CHECK-NEXT: ret + %vc = fcmp one <vscale x 8 x bfloat> %va, %vb + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_one_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: fcmp_one_vf_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmflt.vv v8, v12, v16 +; CHECK-NEXT: vmflt.vv v9, v16, v12 +; CHECK-NEXT: vmor.mm v0, v9, v8 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fcmp one <vscale x 8 x bfloat> %va, %splat + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_one_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: fcmp_one_fv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmflt.vv v8, v16, v12 +; CHECK-NEXT: vmflt.vv v9, v12, v16 +; CHECK-NEXT: vmor.mm v0, v9, v8 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fcmp one <vscale x 8 x bfloat> %splat, %va + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_one_vv_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) #0 { +; CHECK-LABEL: fcmp_one_vv_nxv8bf16_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfne.vv v0, v16, v12 +; CHECK-NEXT: ret + %vc = fcmp one <vscale x 8 x bfloat> %va, %vb + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_one_vf_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, bfloat %b) #0 { +; CHECK-LABEL: fcmp_one_vf_nxv8bf16_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfne.vv v0, v12, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fcmp one <vscale x 8 x bfloat> %va, %splat + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_ord_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) { +; CHECK-LABEL: fcmp_ord_vv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfeq.vv v10, v12, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfeq.vv v8, v12, v12 +; CHECK-NEXT: vmand.mm v0, v8, v10 +; CHECK-NEXT: ret + %vc = fcmp ord <vscale x 8 x bfloat> %va, %vb + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_ord_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: fcmp_ord_vf_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfeq.vv v8, v12, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfeq.vv v9, v12, v12 +; CHECK-NEXT: vmand.mm v0, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fcmp ord <vscale x 8 x bfloat> %va, %splat + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_ord_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: fcmp_ord_fv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfeq.vv v8, v12, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfeq.vv v9, v12, v12 +; CHECK-NEXT: vmand.mm v0, v9, v8 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fcmp ord <vscale x 8 x bfloat> %splat, %va + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_ord_vv_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) #0 { +; CHECK-LABEL: fcmp_ord_vv_nxv8bf16_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfeq.vv v10, v12, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfeq.vv v8, v12, v12 +; CHECK-NEXT: vmand.mm v0, v8, v10 +; CHECK-NEXT: ret + %vc = fcmp ord <vscale x 8 x bfloat> %va, %vb + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_ord_vf_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, bfloat %b) #0 { +; CHECK-LABEL: fcmp_ord_vf_nxv8bf16_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfeq.vv v8, v12, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfeq.vv v9, v12, v12 +; CHECK-NEXT: vmand.mm v0, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fcmp ord <vscale x 8 x bfloat> %va, %splat + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_ueq_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) { +; CHECK-LABEL: fcmp_ueq_vv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmflt.vv v8, v16, v12 +; CHECK-NEXT: vmflt.vv v9, v12, v16 +; CHECK-NEXT: vmnor.mm v0, v9, v8 +; CHECK-NEXT: ret + %vc = fcmp ueq <vscale x 8 x bfloat> %va, %vb + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: fcmp_ueq_vf_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmflt.vv v8, v12, v16 +; CHECK-NEXT: vmflt.vv v9, v16, v12 +; CHECK-NEXT: vmnor.mm v0, v9, v8 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fcmp ueq <vscale x 8 x bfloat> %va, %splat + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_ueq_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: fcmp_ueq_fv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmflt.vv v8, v16, v12 +; CHECK-NEXT: vmflt.vv v9, v12, v16 +; CHECK-NEXT: vmnor.mm v0, v9, v8 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fcmp ueq <vscale x 8 x bfloat> %splat, %va + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_ueq_vv_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) #0 { +; CHECK-LABEL: fcmp_ueq_vv_nxv8bf16_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v16, v12 +; CHECK-NEXT: ret + %vc = fcmp ueq <vscale x 8 x bfloat> %va, %vb + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, bfloat %b) #0 { +; CHECK-LABEL: fcmp_ueq_vf_nxv8bf16_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v12, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fcmp ueq <vscale x 8 x bfloat> %va, %splat + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_ugt_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) { +; CHECK-LABEL: fcmp_ugt_vv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfle.vv v8, v16, v12 +; CHECK-NEXT: vmnot.m v0, v8 +; CHECK-NEXT: ret + %vc = fcmp ugt <vscale x 8 x bfloat> %va, %vb + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: fcmp_ugt_vf_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfle.vv v8, v12, v16 +; CHECK-NEXT: vmnot.m v0, v8 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fcmp ugt <vscale x 8 x bfloat> %va, %splat + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_ugt_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: fcmp_ugt_fv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfle.vv v8, v16, v12 +; CHECK-NEXT: vmnot.m v0, v8 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fcmp ugt <vscale x 8 x bfloat> %splat, %va + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_ugt_vv_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) #0 { +; CHECK-LABEL: fcmp_ugt_vv_nxv8bf16_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmflt.vv v0, v16, v12 +; CHECK-NEXT: ret + %vc = fcmp ugt <vscale x 8 x bfloat> %va, %vb + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, bfloat %b) #0 { +; CHECK-LABEL: fcmp_ugt_vf_nxv8bf16_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmflt.vv v0, v16, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fcmp ugt <vscale x 8 x bfloat> %va, %splat + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_uge_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) { +; CHECK-LABEL: fcmp_uge_vv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmflt.vv v8, v16, v12 +; CHECK-NEXT: vmnot.m v0, v8 +; CHECK-NEXT: ret + %vc = fcmp uge <vscale x 8 x bfloat> %va, %vb + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_uge_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: fcmp_uge_vf_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmflt.vv v8, v12, v16 +; CHECK-NEXT: vmnot.m v0, v8 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fcmp uge <vscale x 8 x bfloat> %va, %splat + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_uge_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: fcmp_uge_fv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmflt.vv v8, v16, v12 +; CHECK-NEXT: vmnot.m v0, v8 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fcmp uge <vscale x 8 x bfloat> %splat, %va + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_uge_vv_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) #0 { +; CHECK-LABEL: fcmp_uge_vv_nxv8bf16_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfle.vv v0, v16, v12 +; CHECK-NEXT: ret + %vc = fcmp uge <vscale x 8 x bfloat> %va, %vb + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_uge_vf_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, bfloat %b) #0 { +; CHECK-LABEL: fcmp_uge_vf_nxv8bf16_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfle.vv v0, v16, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fcmp uge <vscale x 8 x bfloat> %va, %splat + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_ult_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) { +; CHECK-LABEL: fcmp_ult_vv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfle.vv v8, v16, v12 +; CHECK-NEXT: vmnot.m v0, v8 +; CHECK-NEXT: ret + %vc = fcmp ult <vscale x 8 x bfloat> %va, %vb + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_ult_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: fcmp_ult_vf_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfle.vv v8, v16, v12 +; CHECK-NEXT: vmnot.m v0, v8 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fcmp ult <vscale x 8 x bfloat> %va, %splat + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_ult_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: fcmp_ult_fv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfle.vv v8, v12, v16 +; CHECK-NEXT: vmnot.m v0, v8 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fcmp ult <vscale x 8 x bfloat> %splat, %va + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_ult_vv_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) #0 { +; CHECK-LABEL: fcmp_ult_vv_nxv8bf16_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmflt.vv v0, v16, v12 +; CHECK-NEXT: ret + %vc = fcmp ult <vscale x 8 x bfloat> %va, %vb + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_ult_vf_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, bfloat %b) #0 { +; CHECK-LABEL: fcmp_ult_vf_nxv8bf16_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmflt.vv v0, v12, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fcmp ult <vscale x 8 x bfloat> %va, %splat + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_ule_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) { +; CHECK-LABEL: fcmp_ule_vv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmflt.vv v8, v16, v12 +; CHECK-NEXT: vmnot.m v0, v8 +; CHECK-NEXT: ret + %vc = fcmp ule <vscale x 8 x bfloat> %va, %vb + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_ule_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: fcmp_ule_vf_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmflt.vv v8, v16, v12 +; CHECK-NEXT: vmnot.m v0, v8 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fcmp ule <vscale x 8 x bfloat> %va, %splat + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_ule_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: fcmp_ule_fv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmflt.vv v8, v12, v16 +; CHECK-NEXT: vmnot.m v0, v8 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fcmp ule <vscale x 8 x bfloat> %splat, %va + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_ule_vv_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) #0 { +; CHECK-LABEL: fcmp_ule_vv_nxv8bf16_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfle.vv v0, v16, v12 +; CHECK-NEXT: ret + %vc = fcmp ule <vscale x 8 x bfloat> %va, %vb + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_ule_vf_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, bfloat %b) #0 { +; CHECK-LABEL: fcmp_ule_vf_nxv8bf16_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfle.vv v0, v12, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fcmp ule <vscale x 8 x bfloat> %va, %splat + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_une_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) { +; CHECK-LABEL: fcmp_une_vv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfne.vv v0, v16, v12 +; CHECK-NEXT: ret + %vc = fcmp une <vscale x 8 x bfloat> %va, %vb + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_une_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: fcmp_une_vf_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfne.vv v0, v12, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fcmp une <vscale x 8 x bfloat> %va, %splat + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_une_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: fcmp_une_fv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfne.vv v0, v16, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fcmp une <vscale x 8 x bfloat> %splat, %va + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_une_vv_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) #0 { +; CHECK-LABEL: fcmp_une_vv_nxv8bf16_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfne.vv v0, v16, v12 +; CHECK-NEXT: ret + %vc = fcmp une <vscale x 8 x bfloat> %va, %vb + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_une_vf_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, bfloat %b) #0 { +; CHECK-LABEL: fcmp_une_vf_nxv8bf16_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfne.vv v0, v12, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fcmp une <vscale x 8 x bfloat> %va, %splat + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_uno_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) { +; CHECK-LABEL: fcmp_uno_vv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfne.vv v10, v12, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfne.vv v8, v12, v12 +; CHECK-NEXT: vmor.mm v0, v8, v10 +; CHECK-NEXT: ret + %vc = fcmp uno <vscale x 8 x bfloat> %va, %vb + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_uno_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: fcmp_uno_vf_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfne.vv v8, v12, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfne.vv v9, v12, v12 +; CHECK-NEXT: vmor.mm v0, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fcmp uno <vscale x 8 x bfloat> %va, %splat + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_uno_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: fcmp_uno_fv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfne.vv v8, v12, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfne.vv v9, v12, v12 +; CHECK-NEXT: vmor.mm v0, v9, v8 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fcmp uno <vscale x 8 x bfloat> %splat, %va + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_uno_vv_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) #0 { +; CHECK-LABEL: fcmp_uno_vv_nxv8bf16_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfne.vv v10, v12, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfne.vv v8, v12, v12 +; CHECK-NEXT: vmor.mm v0, v8, v10 +; CHECK-NEXT: ret + %vc = fcmp uno <vscale x 8 x bfloat> %va, %vb + ret <vscale x 8 x i1> %vc +} + +define <vscale x 8 x i1> @fcmp_uno_vf_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, bfloat %b) #0 { +; CHECK-LABEL: fcmp_uno_vf_nxv8bf16_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfne.vv v8, v12, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfne.vv v9, v12, v12 +; CHECK-NEXT: vmor.mm v0, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fcmp uno <vscale x 8 x bfloat> %va, %splat + ret <vscale x 8 x i1> %vc +} + define <vscale x 8 x i1> @fcmp_oeq_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb) { ; ZVFH-LABEL: fcmp_oeq_vv_nxv8f16: ; ZVFH: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-constrained-sdnode.ll index af80e627b43f..53be153f8ff2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfadd-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-constrained-sdnode.ll @@ -1,12 +1,239 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN + +define <vscale x 1 x bfloat> @vfadd_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb) strictfp { +; CHECK-LABEL: vfadd_vv_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfadd.vv v9, v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret +entry: + %vc = call <vscale x 1 x bfloat> @llvm.experimental.constrained.fadd.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 1 x bfloat> %vc +} + +define <vscale x 1 x bfloat> @vfadd_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b) strictfp { +; CHECK-LABEL: vfadd_vf_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfadd.vv v9, v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %vc = call <vscale x 1 x bfloat> @llvm.experimental.constrained.fadd.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 1 x bfloat> %vc +} + +define <vscale x 2 x bfloat> @vfadd_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb) strictfp { +; CHECK-LABEL: vfadd_vv_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfadd.vv v9, v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret +entry: + %vc = call <vscale x 2 x bfloat> @llvm.experimental.constrained.fadd.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 2 x bfloat> %vc +} + +define <vscale x 2 x bfloat> @vfadd_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloat %b) strictfp { +; CHECK-LABEL: vfadd_vf_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfadd.vv v9, v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer + %vc = call <vscale x 2 x bfloat> @llvm.experimental.constrained.fadd.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 2 x bfloat> %vc +} + +define <vscale x 4 x bfloat> @vfadd_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb) strictfp { +; CHECK-LABEL: vfadd_vv_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfadd.vv v10, v12, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret +entry: + %vc = call <vscale x 4 x bfloat> @llvm.experimental.constrained.fadd.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 4 x bfloat> %vc +} + +define <vscale x 4 x bfloat> @vfadd_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloat %b) strictfp { +; CHECK-LABEL: vfadd_vf_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfadd.vv v10, v10, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer + %vc = call <vscale x 4 x bfloat> @llvm.experimental.constrained.fadd.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 4 x bfloat> %vc +} + +define <vscale x 8 x bfloat> @vfadd_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) strictfp { +; CHECK-LABEL: vfadd_vv_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfadd.vv v12, v16, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret +entry: + %vc = call <vscale x 8 x bfloat> @llvm.experimental.constrained.fadd.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 8 x bfloat> %vc +} + +define <vscale x 8 x bfloat> @vfadd_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) strictfp { +; CHECK-LABEL: vfadd_vf_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfadd.vv v12, v12, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = call <vscale x 8 x bfloat> @llvm.experimental.constrained.fadd.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 8 x bfloat> %vc +} + +define <vscale x 16 x bfloat> @vfadd_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb) strictfp { +; CHECK-LABEL: vfadd_vv_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfadd.vv v16, v24, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret +entry: + %vc = call <vscale x 16 x bfloat> @llvm.experimental.constrained.fadd.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 16 x bfloat> %vc +} + +define <vscale x 16 x bfloat> @vfadd_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bfloat %b) strictfp { +; CHECK-LABEL: vfadd_vf_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv.v.x v12, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfadd.vv v16, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer + %vc = call <vscale x 16 x bfloat> @llvm.experimental.constrained.fadd.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 16 x bfloat> %vc +} + +define <vscale x 32 x bfloat> @vfadd_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb) strictfp { +; CHECK-LABEL: vfadd_vv_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16 +; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfadd.vv v24, v0, v24 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfadd.vv v16, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: ret +entry: + %vc = call <vscale x 32 x bfloat> @llvm.experimental.constrained.fadd.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 32 x bfloat> %vc +} + +define <vscale x 32 x bfloat> @vfadd_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bfloat %b) strictfp { +; CHECK-LABEL: vfadd_vf_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; CHECK-NEXT: vmv.v.x v16, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfadd.vv v24, v24, v0 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfadd.vv v16, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 32 x bfloat> %head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer + %vc = call <vscale x 32 x bfloat> @llvm.experimental.constrained.fadd.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 32 x bfloat> %vc +} declare <vscale x 1 x half> @llvm.experimental.constrained.fadd.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, metadata, metadata) define <vscale x 1 x half> @vfadd_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb) strictfp { diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-sdnode.ll index 8f21e326e687..c3c0958f7096 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfadd-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-sdnode.ll @@ -1,12 +1,252 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN + +define <vscale x 1 x bfloat> @vfadd_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb) { +; CHECK-LABEL: vfadd_vv_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfadd.vv v9, v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %vc = fadd <vscale x 1 x bfloat> %va, %vb + ret <vscale x 1 x bfloat> %vc +} + +define <vscale x 1 x bfloat> @vfadd_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: vfadd_vf_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfadd.vv v9, v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %vc = fadd <vscale x 1 x bfloat> %va, %splat + ret <vscale x 1 x bfloat> %vc +} + +define <vscale x 2 x bfloat> @vfadd_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb) { +; CHECK-LABEL: vfadd_vv_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfadd.vv v9, v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %vc = fadd <vscale x 2 x bfloat> %va, %vb + ret <vscale x 2 x bfloat> %vc +} + +define <vscale x 2 x bfloat> @vfadd_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: vfadd_vf_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfadd.vv v9, v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer + %vc = fadd <vscale x 2 x bfloat> %va, %splat + ret <vscale x 2 x bfloat> %vc +} + +define <vscale x 4 x bfloat> @vfadd_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb) { +; CHECK-LABEL: vfadd_vv_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfadd.vv v10, v12, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %vc = fadd <vscale x 4 x bfloat> %va, %vb + ret <vscale x 4 x bfloat> %vc +} + +define <vscale x 4 x bfloat> @vfadd_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: vfadd_vf_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfadd.vv v10, v10, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer + %vc = fadd <vscale x 4 x bfloat> %va, %splat + ret <vscale x 4 x bfloat> %vc +} + +define <vscale x 8 x bfloat> @vfadd_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) { +; CHECK-LABEL: vfadd_vv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfadd.vv v12, v16, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %vc = fadd <vscale x 8 x bfloat> %va, %vb + ret <vscale x 8 x bfloat> %vc +} + +define <vscale x 8 x bfloat> @vfadd_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: vfadd_vf_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfadd.vv v12, v12, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fadd <vscale x 8 x bfloat> %va, %splat + ret <vscale x 8 x bfloat> %vc +} + +define <vscale x 8 x bfloat> @vfadd_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: vfadd_fv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfadd.vv v12, v16, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fadd <vscale x 8 x bfloat> %splat, %va + ret <vscale x 8 x bfloat> %vc +} + +define <vscale x 16 x bfloat> @vfadd_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb) { +; CHECK-LABEL: vfadd_vv_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfadd.vv v16, v24, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %vc = fadd <vscale x 16 x bfloat> %va, %vb + ret <vscale x 16 x bfloat> %vc +} + +define <vscale x 16 x bfloat> @vfadd_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: vfadd_vf_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv.v.x v12, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfadd.vv v16, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer + %vc = fadd <vscale x 16 x bfloat> %va, %splat + ret <vscale x 16 x bfloat> %vc +} + +define <vscale x 32 x bfloat> @vfadd_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb) { +; CHECK-LABEL: vfadd_vv_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16 +; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfadd.vv v24, v0, v24 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfadd.vv v16, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: ret + %vc = fadd <vscale x 32 x bfloat> %va, %vb + ret <vscale x 32 x bfloat> %vc +} + +define <vscale x 32 x bfloat> @vfadd_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: vfadd_vf_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; CHECK-NEXT: vmv.v.x v16, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfadd.vv v24, v24, v0 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v20 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfadd.vv v16, v24, v0 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 32 x bfloat> %head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer + %vc = fadd <vscale x 32 x bfloat> %va, %splat + ret <vscale x 32 x bfloat> %vc +} define <vscale x 1 x half> @vfadd_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb) { ; ZVFH-LABEL: vfadd_vv_nxv1f16: diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll index 395f1a7c382b..b3de904d2062 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll @@ -1,13 +1,660 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN + +declare <vscale x 1 x bfloat> @llvm.vp.fadd.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x i1>, i32) + +define <vscale x 1 x bfloat> @vfadd_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfadd_vv_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfadd.vv v9, v9, v10, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 1 x bfloat> @llvm.vp.fadd.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x bfloat> %v +} + +define <vscale x 1 x bfloat> @vfadd_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, i32 zeroext %evl) { +; CHECK-LABEL: vfadd_vv_nxv1bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfadd.vv v9, v9, v10 +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 1 x bfloat> @llvm.vp.fadd.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, <vscale x 1 x i1> splat (i1 true), i32 %evl) + ret <vscale x 1 x bfloat> %v +} + +define <vscale x 1 x bfloat> @vfadd_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfadd_vf_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfadd.vv v9, v10, v8, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x bfloat> @llvm.vp.fadd.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x bfloat> %v +} + +define <vscale x 1 x bfloat> @vfadd_vf_nxv1bf16_commute(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfadd_vf_nxv1bf16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfadd.vv v9, v8, v10, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x bfloat> @llvm.vp.fadd.nxv1bf16(<vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x bfloat> %v +} +define <vscale x 1 x bfloat> @vfadd_vf_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, bfloat %b, i32 zeroext %evl) { +; CHECK-LABEL: vfadd_vf_nxv1bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfadd.vv v9, v10, v8 +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x bfloat> @llvm.vp.fadd.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl) + ret <vscale x 1 x bfloat> %v +} + +define <vscale x 1 x bfloat> @vfadd_vf_nxv1bf16_unmasked_commute(<vscale x 1 x bfloat> %va, bfloat %b, i32 zeroext %evl) { +; CHECK-LABEL: vfadd_vf_nxv1bf16_unmasked_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfadd.vv v9, v8, v10 +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x bfloat> @llvm.vp.fadd.nxv1bf16(<vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl) + ret <vscale x 1 x bfloat> %v +} + +declare <vscale x 2 x bfloat> @llvm.vp.fadd.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x i1>, i32) + +define <vscale x 2 x bfloat> @vfadd_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfadd_vv_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfadd.vv v9, v9, v10, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 2 x bfloat> @llvm.vp.fadd.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x bfloat> %v +} + +define <vscale x 2 x bfloat> @vfadd_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, i32 zeroext %evl) { +; CHECK-LABEL: vfadd_vv_nxv2bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfadd.vv v9, v9, v10 +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 2 x bfloat> @llvm.vp.fadd.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, <vscale x 2 x i1> splat (i1 true), i32 %evl) + ret <vscale x 2 x bfloat> %v +} + +define <vscale x 2 x bfloat> @vfadd_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloat %b, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfadd_vf_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfadd.vv v9, v10, v8, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 2 x bfloat> %elt.head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x bfloat> @llvm.vp.fadd.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x bfloat> %v +} + +define <vscale x 2 x bfloat> @vfadd_vf_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, bfloat %b, i32 zeroext %evl) { +; CHECK-LABEL: vfadd_vf_nxv2bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfadd.vv v9, v10, v8 +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 2 x bfloat> %elt.head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x bfloat> @llvm.vp.fadd.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl) + ret <vscale x 2 x bfloat> %v +} + +declare <vscale x 4 x bfloat> @llvm.vp.fadd.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x i1>, i32) + +define <vscale x 4 x bfloat> @vfadd_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfadd_vv_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfadd.vv v10, v12, v10, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %v = call <vscale x 4 x bfloat> @llvm.vp.fadd.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x bfloat> %v +} + +define <vscale x 4 x bfloat> @vfadd_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, i32 zeroext %evl) { +; CHECK-LABEL: vfadd_vv_nxv4bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfadd.vv v10, v12, v10 +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %v = call <vscale x 4 x bfloat> @llvm.vp.fadd.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, <vscale x 4 x i1> splat (i1 true), i32 %evl) + ret <vscale x 4 x bfloat> %v +} + +define <vscale x 4 x bfloat> @vfadd_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloat %b, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfadd_vf_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfadd.vv v10, v10, v12, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 4 x bfloat> %elt.head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x bfloat> @llvm.vp.fadd.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x bfloat> %v +} + +define <vscale x 4 x bfloat> @vfadd_vf_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, bfloat %b, i32 zeroext %evl) { +; CHECK-LABEL: vfadd_vf_nxv4bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfadd.vv v10, v10, v12 +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 4 x bfloat> %elt.head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x bfloat> @llvm.vp.fadd.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl) + ret <vscale x 4 x bfloat> %v +} + +declare <vscale x 8 x bfloat> @llvm.vp.fadd.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, i32) + +define <vscale x 8 x bfloat> @vfadd_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfadd_vv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfadd.vv v12, v16, v12, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %v = call <vscale x 8 x bfloat> @llvm.vp.fadd.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x bfloat> %v +} + +define <vscale x 8 x bfloat> @vfadd_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, i32 zeroext %evl) { +; CHECK-LABEL: vfadd_vv_nxv8bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfadd.vv v12, v16, v12 +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %v = call <vscale x 8 x bfloat> @llvm.vp.fadd.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, <vscale x 8 x i1> splat (i1 true), i32 %evl) + ret <vscale x 8 x bfloat> %v +} + +define <vscale x 8 x bfloat> @vfadd_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfadd_vf_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfadd.vv v12, v12, v16, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x bfloat> @llvm.vp.fadd.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x bfloat> %v +} + +define <vscale x 8 x bfloat> @vfadd_vf_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, bfloat %b, i32 zeroext %evl) { +; CHECK-LABEL: vfadd_vf_nxv8bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfadd.vv v12, v12, v16 +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x bfloat> @llvm.vp.fadd.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl) + ret <vscale x 8 x bfloat> %v +} + +declare <vscale x 16 x bfloat> @llvm.vp.fadd.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x bfloat>, <vscale x 16 x i1>, i32) + +define <vscale x 16 x bfloat> @vfadd_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfadd_vv_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfadd.vv v16, v24, v16, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %v = call <vscale x 16 x bfloat> @llvm.vp.fadd.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x bfloat> %v +} + +define <vscale x 16 x bfloat> @vfadd_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, i32 zeroext %evl) { +; CHECK-LABEL: vfadd_vv_nxv16bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfadd.vv v16, v24, v16 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %v = call <vscale x 16 x bfloat> @llvm.vp.fadd.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, <vscale x 16 x i1> splat (i1 true), i32 %evl) + ret <vscale x 16 x bfloat> %v +} + +define <vscale x 16 x bfloat> @vfadd_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bfloat %b, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfadd_vf_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv.v.x v12, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfadd.vv v16, v16, v24, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 16 x bfloat> %elt.head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x bfloat> @llvm.vp.fadd.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x bfloat> %v +} + +define <vscale x 16 x bfloat> @vfadd_vf_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, bfloat %b, i32 zeroext %evl) { +; CHECK-LABEL: vfadd_vf_nxv16bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv.v.x v12, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfadd.vv v16, v16, v24 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 16 x bfloat> %elt.head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x bfloat> @llvm.vp.fadd.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> splat (i1 true), i32 %evl) + ret <vscale x 16 x bfloat> %v +} + +declare <vscale x 32 x bfloat> @llvm.vp.fadd.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x bfloat>, <vscale x 32 x i1>, i32) + +define <vscale x 32 x bfloat> @vfadd_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %b, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfadd_vv_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vmv1r.v v7, v0 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a1, a2, 1 +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: sltu a4, a0, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: srli a2, a2, 2 +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a2 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vfadd.vv v16, v16, v24, v0.t +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: bltu a0, a1, .LBB22_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB22_2: +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfadd.vv v16, v24, v16, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call <vscale x 32 x bfloat> @llvm.vp.fadd.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %b, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x bfloat> %v +} + +define <vscale x 32 x bfloat> @vfadd_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %b, i32 zeroext %evl) { +; CHECK-LABEL: vfadd_vv_nxv32bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a1, a2, 1 +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: sltu a4, a0, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: srli a2, a2, 2 +; CHECK-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; CHECK-NEXT: vmset.m v24 +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v24, a2 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vfadd.vv v16, v16, v24, v0.t +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: bltu a0, a1, .LBB23_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB23_2: +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfadd.vv v16, v24, v16 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call <vscale x 32 x bfloat> @llvm.vp.fadd.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %b, <vscale x 32 x i1> splat (i1 true), i32 %evl) + ret <vscale x 32 x bfloat> %v +} + +define <vscale x 32 x bfloat> @vfadd_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bfloat %b, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfadd_vf_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 18 * vlenb +; CHECK-NEXT: vmv8r.v v24, v8 +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m8, ta, ma +; CHECK-NEXT: vmv.v.x v16, a1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a1, a2, 1 +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: sltu a4, a0, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: srli a2, a2, 2 +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: slli a4, a4, 3 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vs1r.v v0, (a4) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a2 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v28 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a4, a2, 3 +; CHECK-NEXT: add a2, a4, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v28 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vfadd.vv v16, v8, v16, v0.t +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: bltu a0, a1, .LBB24_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB24_2: +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfadd.vv v16, v16, v24, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 32 x bfloat> %elt.head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x bfloat> @llvm.vp.fadd.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x bfloat> %v +} + +define <vscale x 32 x bfloat> @vfadd_vf_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, bfloat %b, i32 zeroext %evl) { +; CHECK-LABEL: vfadd_vf_nxv32bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: vmv8r.v v16, v8 +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m8, ta, ma +; CHECK-NEXT: vmv.v.x v8, a1 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a1, a2, 1 +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: sltu a4, a0, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: srli a2, a2, 2 +; CHECK-NEXT: vmset.m v24 +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v24, a2 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20 +; CHECK-NEXT: vmv4r.v v16, v8 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vfadd.vv v16, v24, v16, v0.t +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: bltu a0, a1, .LBB25_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB25_2: +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfadd.vv v16, v16, v24 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 32 x bfloat> %elt.head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x bfloat> @llvm.vp.fadd.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> splat (i1 true), i32 %evl) + ret <vscale x 32 x bfloat> %v +} declare <vscale x 1 x half> @llvm.vp.fadd.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, <vscale x 1 x i1>, i32) define <vscale x 1 x half> @vfadd_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { @@ -564,10 +1211,10 @@ define <vscale x 32 x half> @vfadd_vv_nxv32f16(<vscale x 32 x half> %va, <vscale ; ZVFHMIN-NEXT: vfadd.vv v16, v16, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 -; ZVFHMIN-NEXT: bltu a0, a1, .LBB22_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB48_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB22_2: +; ZVFHMIN-NEXT: .LBB48_2: ; ZVFHMIN-NEXT: addi a1, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 @@ -621,10 +1268,10 @@ define <vscale x 32 x half> @vfadd_vv_nxv32f16_unmasked(<vscale x 32 x half> %va ; ZVFHMIN-NEXT: vfadd.vv v16, v16, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 -; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB49_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB23_2: +; ZVFHMIN-NEXT: .LBB49_2: ; ZVFHMIN-NEXT: addi a1, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 @@ -699,10 +1346,10 @@ define <vscale x 32 x half> @vfadd_vf_nxv32f16(<vscale x 32 x half> %va, half %b ; ZVFHMIN-NEXT: vfadd.vv v16, v8, v16, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 -; ZVFHMIN-NEXT: bltu a0, a1, .LBB24_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB50_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB24_2: +; ZVFHMIN-NEXT: .LBB50_2: ; ZVFHMIN-NEXT: addi a1, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 @@ -780,10 +1427,10 @@ define <vscale x 32 x half> @vfadd_vf_nxv32f16_unmasked(<vscale x 32 x half> %va ; ZVFHMIN-NEXT: vfadd.vv v16, v24, v16, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 -; ZVFHMIN-NEXT: bltu a0, a1, .LBB25_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB51_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB25_2: +; ZVFHMIN-NEXT: .LBB51_2: ; ZVFHMIN-NEXT: addi a1, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 diff --git a/llvm/test/CodeGen/RISCV/rvv/vfclass-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfclass-sdnode.ll index f2af8ac3b02d..c97278480f1a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfclass-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfclass-sdnode.ll @@ -1,18 +1,51 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN -define <vscale x 2 x i1> @isnan_nxv2f16(<vscale x 2 x half> %x) { -; CHECK-LABEL: isnan_nxv2f16: +define <vscale x 2 x i1> @isnan_nxv2bf16(<vscale x 2 x bfloat> %x) { +; CHECK-LABEL: isnan_nxv2bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfclass.v v8, v8 -; CHECK-NEXT: li a0, 768 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: lui a0, 8 +; CHECK-NEXT: addi a1, a0, -1 +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: addi a0, a0, -128 +; CHECK-NEXT: vmsgt.vx v0, v8, a0 ; CHECK-NEXT: ret + %1 = call <vscale x 2 x i1> @llvm.is.fpclass.nxv2bf16(<vscale x 2 x bfloat> %x, i32 3) ; nan + ret <vscale x 2 x i1> %1 +} + +define <vscale x 2 x i1> @isnan_nxv2f16(<vscale x 2 x half> %x) { +; ZVFH-LABEL: isnan_nxv2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFH-NEXT: vfclass.v v8, v8 +; ZVFH-NEXT: li a0, 768 +; ZVFH-NEXT: vand.vx v8, v8, a0 +; ZVFH-NEXT: vmsne.vi v0, v8, 0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: isnan_nxv2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a0 +; ZVFHMIN-NEXT: li a0, 31 +; ZVFHMIN-NEXT: slli a0, a0, 10 +; ZVFHMIN-NEXT: vmsgt.vx v0, v8, a0 +; ZVFHMIN-NEXT: ret %1 = call <vscale x 2 x i1> @llvm.is.fpclass.nxv2f16(<vscale x 2 x half> %x, i32 3) ; nan ret <vscale x 2 x i1> %1 } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfdiv-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfdiv-constrained-sdnode.ll index 69095a0b21bb..aa59732e1e1e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfdiv-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfdiv-constrained-sdnode.ll @@ -1,12 +1,258 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN + +define <vscale x 1 x bfloat> @vfdiv_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb) strictfp { +; CHECK-LABEL: vfdiv_vv_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfdiv.vv v9, v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret +entry: + %vc = call <vscale x 1 x bfloat> @llvm.experimental.constrained.fdiv.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 1 x bfloat> %vc +} + +define <vscale x 1 x bfloat> @vfdiv_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b) strictfp { +; CHECK-LABEL: vfdiv_vf_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfdiv.vv v9, v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %vc = call <vscale x 1 x bfloat> @llvm.experimental.constrained.fdiv.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 1 x bfloat> %vc +} + +define <vscale x 2 x bfloat> @vfdiv_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb) strictfp { +; CHECK-LABEL: vfdiv_vv_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfdiv.vv v9, v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret +entry: + %vc = call <vscale x 2 x bfloat> @llvm.experimental.constrained.fdiv.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 2 x bfloat> %vc +} + +define <vscale x 2 x bfloat> @vfdiv_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloat %b) strictfp { +; CHECK-LABEL: vfdiv_vf_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfdiv.vv v9, v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer + %vc = call <vscale x 2 x bfloat> @llvm.experimental.constrained.fdiv.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 2 x bfloat> %vc +} + +define <vscale x 4 x bfloat> @vfdiv_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb) strictfp { +; CHECK-LABEL: vfdiv_vv_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfdiv.vv v10, v12, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret +entry: + %vc = call <vscale x 4 x bfloat> @llvm.experimental.constrained.fdiv.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 4 x bfloat> %vc +} + +define <vscale x 4 x bfloat> @vfdiv_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloat %b) strictfp { +; CHECK-LABEL: vfdiv_vf_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfdiv.vv v10, v10, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer + %vc = call <vscale x 4 x bfloat> @llvm.experimental.constrained.fdiv.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 4 x bfloat> %vc +} + +define <vscale x 8 x bfloat> @vfdiv_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) strictfp { +; CHECK-LABEL: vfdiv_vv_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfdiv.vv v12, v16, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret +entry: + %vc = call <vscale x 8 x bfloat> @llvm.experimental.constrained.fdiv.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 8 x bfloat> %vc +} + +define <vscale x 8 x bfloat> @vfdiv_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) strictfp { +; CHECK-LABEL: vfdiv_vf_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfdiv.vv v12, v12, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = call <vscale x 8 x bfloat> @llvm.experimental.constrained.fdiv.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 8 x bfloat> %vc +} + +define <vscale x 8 x bfloat> @vfdiv_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) strictfp { +; CHECK-LABEL: vfdiv_fv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfdiv.vv v12, v16, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = call <vscale x 8 x bfloat> @llvm.experimental.constrained.fdiv.nxv8bf16(<vscale x 8 x bfloat> %splat, <vscale x 8 x bfloat> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 8 x bfloat> %vc +} + +define <vscale x 16 x bfloat> @vfdiv_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb) strictfp { +; CHECK-LABEL: vfdiv_vv_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfdiv.vv v16, v24, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret +entry: + %vc = call <vscale x 16 x bfloat> @llvm.experimental.constrained.fdiv.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 16 x bfloat> %vc +} + +define <vscale x 16 x bfloat> @vfdiv_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bfloat %b) strictfp { +; CHECK-LABEL: vfdiv_vf_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv.v.x v12, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfdiv.vv v16, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer + %vc = call <vscale x 16 x bfloat> @llvm.experimental.constrained.fdiv.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 16 x bfloat> %vc +} + +define <vscale x 32 x bfloat> @vfdiv_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb) strictfp { +; CHECK-LABEL: vfdiv_vv_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16 +; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfdiv.vv v24, v0, v24 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfdiv.vv v16, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: ret +entry: + %vc = call <vscale x 32 x bfloat> @llvm.experimental.constrained.fdiv.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 32 x bfloat> %vc +} + +define <vscale x 32 x bfloat> @vfdiv_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bfloat %b) strictfp { +; CHECK-LABEL: vfdiv_vf_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; CHECK-NEXT: vmv.v.x v16, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfdiv.vv v24, v24, v0 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfdiv.vv v16, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 32 x bfloat> %head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer + %vc = call <vscale x 32 x bfloat> @llvm.experimental.constrained.fdiv.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 32 x bfloat> %vc +} declare <vscale x 1 x half> @llvm.experimental.constrained.fdiv.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, metadata, metadata) define <vscale x 1 x half> @vfdiv_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb) strictfp { diff --git a/llvm/test/CodeGen/RISCV/rvv/vfdiv-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfdiv-sdnode.ll index 9f5434dd3472..f7db2be35d72 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfdiv-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfdiv-sdnode.ll @@ -1,13 +1,249 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +define <vscale x 1 x bfloat> @vfdiv_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb) { +; CHECK-LABEL: vfdiv_vv_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfdiv.vv v9, v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %vc = fdiv <vscale x 1 x bfloat> %va, %vb + ret <vscale x 1 x bfloat> %vc +} + +define <vscale x 1 x bfloat> @vfdiv_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: vfdiv_vf_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfdiv.vv v9, v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %vc = fdiv <vscale x 1 x bfloat> %va, %splat + ret <vscale x 1 x bfloat> %vc +} + +define <vscale x 2 x bfloat> @vfdiv_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb) { +; CHECK-LABEL: vfdiv_vv_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfdiv.vv v9, v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %vc = fdiv <vscale x 2 x bfloat> %va, %vb + ret <vscale x 2 x bfloat> %vc +} + +define <vscale x 2 x bfloat> @vfdiv_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: vfdiv_vf_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfdiv.vv v9, v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer + %vc = fdiv <vscale x 2 x bfloat> %va, %splat + ret <vscale x 2 x bfloat> %vc +} + +define <vscale x 4 x bfloat> @vfdiv_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb) { +; CHECK-LABEL: vfdiv_vv_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfdiv.vv v10, v12, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %vc = fdiv <vscale x 4 x bfloat> %va, %vb + ret <vscale x 4 x bfloat> %vc +} + +define <vscale x 4 x bfloat> @vfdiv_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: vfdiv_vf_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfdiv.vv v10, v10, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer + %vc = fdiv <vscale x 4 x bfloat> %va, %splat + ret <vscale x 4 x bfloat> %vc +} + +define <vscale x 8 x bfloat> @vfdiv_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) { +; CHECK-LABEL: vfdiv_vv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfdiv.vv v12, v16, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %vc = fdiv <vscale x 8 x bfloat> %va, %vb + ret <vscale x 8 x bfloat> %vc +} + +define <vscale x 8 x bfloat> @vfdiv_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: vfdiv_vf_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfdiv.vv v12, v12, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fdiv <vscale x 8 x bfloat> %va, %splat + ret <vscale x 8 x bfloat> %vc +} + +define <vscale x 8 x bfloat> @vfdiv_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: vfdiv_fv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfdiv.vv v12, v16, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fdiv <vscale x 8 x bfloat> %splat, %va + ret <vscale x 8 x bfloat> %vc +} + +define <vscale x 16 x bfloat> @vfdiv_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb) { +; CHECK-LABEL: vfdiv_vv_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfdiv.vv v16, v24, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %vc = fdiv <vscale x 16 x bfloat> %va, %vb + ret <vscale x 16 x bfloat> %vc +} + +define <vscale x 16 x bfloat> @vfdiv_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: vfdiv_vf_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv.v.x v12, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfdiv.vv v16, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer + %vc = fdiv <vscale x 16 x bfloat> %va, %splat + ret <vscale x 16 x bfloat> %vc +} + +define <vscale x 32 x bfloat> @vfdiv_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb) { +; CHECK-LABEL: vfdiv_vv_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16 +; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfdiv.vv v24, v0, v24 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfdiv.vv v16, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: ret + %vc = fdiv <vscale x 32 x bfloat> %va, %vb + ret <vscale x 32 x bfloat> %vc +} + +define <vscale x 32 x bfloat> @vfdiv_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: vfdiv_vf_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; CHECK-NEXT: vmv.v.x v16, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfdiv.vv v24, v24, v0 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v20 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfdiv.vv v16, v24, v0 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 32 x bfloat> %head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer + %vc = fdiv <vscale x 32 x bfloat> %va, %splat + ret <vscale x 32 x bfloat> %vc +} + define <vscale x 1 x half> @vfdiv_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb) { ; ZVFH-LABEL: vfdiv_vv_nxv1f16: ; ZVFH: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll index 52e2a9535ef6..aa39fe5b5ec8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll @@ -1,13 +1,622 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN + +declare <vscale x 1 x bfloat> @llvm.vp.fdiv.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x i1>, i32) + +define <vscale x 1 x bfloat> @vfdiv_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfdiv_vv_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfdiv.vv v9, v9, v10, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 1 x bfloat> @llvm.vp.fdiv.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x bfloat> %v +} + +define <vscale x 1 x bfloat> @vfdiv_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, i32 zeroext %evl) { +; CHECK-LABEL: vfdiv_vv_nxv1bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfdiv.vv v9, v9, v10 +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 1 x bfloat> @llvm.vp.fdiv.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, <vscale x 1 x i1> splat (i1 true), i32 %evl) + ret <vscale x 1 x bfloat> %v +} + +define <vscale x 1 x bfloat> @vfdiv_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfdiv_vf_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfdiv.vv v9, v10, v8, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x bfloat> @llvm.vp.fdiv.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x bfloat> %v +} + +define <vscale x 1 x bfloat> @vfdiv_vf_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, bfloat %b, i32 zeroext %evl) { +; CHECK-LABEL: vfdiv_vf_nxv1bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfdiv.vv v9, v10, v8 +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x bfloat> @llvm.vp.fdiv.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl) + ret <vscale x 1 x bfloat> %v +} + +declare <vscale x 2 x bfloat> @llvm.vp.fdiv.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x i1>, i32) + +define <vscale x 2 x bfloat> @vfdiv_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfdiv_vv_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfdiv.vv v9, v9, v10, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 2 x bfloat> @llvm.vp.fdiv.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x bfloat> %v +} + +define <vscale x 2 x bfloat> @vfdiv_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, i32 zeroext %evl) { +; CHECK-LABEL: vfdiv_vv_nxv2bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfdiv.vv v9, v9, v10 +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 2 x bfloat> @llvm.vp.fdiv.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, <vscale x 2 x i1> splat (i1 true), i32 %evl) + ret <vscale x 2 x bfloat> %v +} + +define <vscale x 2 x bfloat> @vfdiv_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloat %b, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfdiv_vf_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfdiv.vv v9, v10, v8, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 2 x bfloat> %elt.head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x bfloat> @llvm.vp.fdiv.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x bfloat> %v +} + +define <vscale x 2 x bfloat> @vfdiv_vf_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, bfloat %b, i32 zeroext %evl) { +; CHECK-LABEL: vfdiv_vf_nxv2bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfdiv.vv v9, v10, v8 +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 2 x bfloat> %elt.head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x bfloat> @llvm.vp.fdiv.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl) + ret <vscale x 2 x bfloat> %v +} + +declare <vscale x 4 x bfloat> @llvm.vp.fdiv.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x i1>, i32) + +define <vscale x 4 x bfloat> @vfdiv_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfdiv_vv_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfdiv.vv v10, v12, v10, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %v = call <vscale x 4 x bfloat> @llvm.vp.fdiv.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x bfloat> %v +} + +define <vscale x 4 x bfloat> @vfdiv_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, i32 zeroext %evl) { +; CHECK-LABEL: vfdiv_vv_nxv4bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfdiv.vv v10, v12, v10 +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %v = call <vscale x 4 x bfloat> @llvm.vp.fdiv.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, <vscale x 4 x i1> splat (i1 true), i32 %evl) + ret <vscale x 4 x bfloat> %v +} + +define <vscale x 4 x bfloat> @vfdiv_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloat %b, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfdiv_vf_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfdiv.vv v10, v10, v12, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 4 x bfloat> %elt.head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x bfloat> @llvm.vp.fdiv.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x bfloat> %v +} + +define <vscale x 4 x bfloat> @vfdiv_vf_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, bfloat %b, i32 zeroext %evl) { +; CHECK-LABEL: vfdiv_vf_nxv4bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfdiv.vv v10, v10, v12 +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 4 x bfloat> %elt.head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x bfloat> @llvm.vp.fdiv.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl) + ret <vscale x 4 x bfloat> %v +} + +declare <vscale x 8 x bfloat> @llvm.vp.fdiv.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, i32) +define <vscale x 8 x bfloat> @vfdiv_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfdiv_vv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfdiv.vv v12, v16, v12, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %v = call <vscale x 8 x bfloat> @llvm.vp.fdiv.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x bfloat> %v +} + +define <vscale x 8 x bfloat> @vfdiv_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, i32 zeroext %evl) { +; CHECK-LABEL: vfdiv_vv_nxv8bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfdiv.vv v12, v16, v12 +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %v = call <vscale x 8 x bfloat> @llvm.vp.fdiv.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, <vscale x 8 x i1> splat (i1 true), i32 %evl) + ret <vscale x 8 x bfloat> %v +} + +define <vscale x 8 x bfloat> @vfdiv_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfdiv_vf_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfdiv.vv v12, v12, v16, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x bfloat> @llvm.vp.fdiv.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x bfloat> %v +} + +define <vscale x 8 x bfloat> @vfdiv_vf_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, bfloat %b, i32 zeroext %evl) { +; CHECK-LABEL: vfdiv_vf_nxv8bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfdiv.vv v12, v12, v16 +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x bfloat> @llvm.vp.fdiv.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl) + ret <vscale x 8 x bfloat> %v +} + +declare <vscale x 16 x bfloat> @llvm.vp.fdiv.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x bfloat>, <vscale x 16 x i1>, i32) + +define <vscale x 16 x bfloat> @vfdiv_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfdiv_vv_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfdiv.vv v16, v24, v16, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %v = call <vscale x 16 x bfloat> @llvm.vp.fdiv.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x bfloat> %v +} + +define <vscale x 16 x bfloat> @vfdiv_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, i32 zeroext %evl) { +; CHECK-LABEL: vfdiv_vv_nxv16bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfdiv.vv v16, v24, v16 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %v = call <vscale x 16 x bfloat> @llvm.vp.fdiv.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, <vscale x 16 x i1> splat (i1 true), i32 %evl) + ret <vscale x 16 x bfloat> %v +} + +define <vscale x 16 x bfloat> @vfdiv_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bfloat %b, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfdiv_vf_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv.v.x v12, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfdiv.vv v16, v16, v24, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 16 x bfloat> %elt.head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x bfloat> @llvm.vp.fdiv.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x bfloat> %v +} + +define <vscale x 16 x bfloat> @vfdiv_vf_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, bfloat %b, i32 zeroext %evl) { +; CHECK-LABEL: vfdiv_vf_nxv16bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv.v.x v12, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfdiv.vv v16, v16, v24 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 16 x bfloat> %elt.head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x bfloat> @llvm.vp.fdiv.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> splat (i1 true), i32 %evl) + ret <vscale x 16 x bfloat> %v +} + +declare <vscale x 32 x bfloat> @llvm.vp.fdiv.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x bfloat>, <vscale x 32 x i1>, i32) + +define <vscale x 32 x bfloat> @vfdiv_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %b, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfdiv_vv_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vmv1r.v v7, v0 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a1, a2, 1 +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: sltu a4, a0, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: srli a2, a2, 2 +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a2 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vfdiv.vv v16, v16, v24, v0.t +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: bltu a0, a1, .LBB20_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB20_2: +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfdiv.vv v16, v24, v16, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call <vscale x 32 x bfloat> @llvm.vp.fdiv.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %b, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x bfloat> %v +} + +define <vscale x 32 x bfloat> @vfdiv_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %b, i32 zeroext %evl) { +; CHECK-LABEL: vfdiv_vv_nxv32bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a1, a2, 1 +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: sltu a4, a0, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: srli a2, a2, 2 +; CHECK-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; CHECK-NEXT: vmset.m v24 +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v24, a2 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vfdiv.vv v16, v16, v24, v0.t +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: bltu a0, a1, .LBB21_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB21_2: +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfdiv.vv v16, v24, v16 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call <vscale x 32 x bfloat> @llvm.vp.fdiv.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %b, <vscale x 32 x i1> splat (i1 true), i32 %evl) + ret <vscale x 32 x bfloat> %v +} + +define <vscale x 32 x bfloat> @vfdiv_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bfloat %b, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfdiv_vf_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 18 * vlenb +; CHECK-NEXT: vmv8r.v v24, v8 +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m8, ta, ma +; CHECK-NEXT: vmv.v.x v16, a1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a1, a2, 1 +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: sltu a4, a0, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: srli a2, a2, 2 +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: slli a4, a4, 3 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vs1r.v v0, (a4) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a2 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v28 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a4, a2, 3 +; CHECK-NEXT: add a2, a4, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v28 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vfdiv.vv v16, v8, v16, v0.t +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: bltu a0, a1, .LBB22_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB22_2: +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfdiv.vv v16, v16, v24, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 32 x bfloat> %elt.head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x bfloat> @llvm.vp.fdiv.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x bfloat> %v +} + +define <vscale x 32 x bfloat> @vfdiv_vf_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, bfloat %b, i32 zeroext %evl) { +; CHECK-LABEL: vfdiv_vf_nxv32bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: vmv8r.v v16, v8 +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m8, ta, ma +; CHECK-NEXT: vmv.v.x v8, a1 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a1, a2, 1 +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: sltu a4, a0, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: srli a2, a2, 2 +; CHECK-NEXT: vmset.m v24 +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v24, a2 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20 +; CHECK-NEXT: vmv4r.v v16, v8 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vfdiv.vv v16, v24, v16, v0.t +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: bltu a0, a1, .LBB23_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB23_2: +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfdiv.vv v16, v16, v24 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 32 x bfloat> %elt.head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x bfloat> @llvm.vp.fdiv.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> splat (i1 true), i32 %evl) + ret <vscale x 32 x bfloat> %v +} declare <vscale x 1 x half> @llvm.vp.fdiv.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, <vscale x 1 x i1>, i32) define <vscale x 1 x half> @vfdiv_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { @@ -514,10 +1123,10 @@ define <vscale x 32 x half> @vfdiv_vv_nxv32f16(<vscale x 32 x half> %va, <vscale ; ZVFHMIN-NEXT: vfdiv.vv v16, v16, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 -; ZVFHMIN-NEXT: bltu a0, a1, .LBB20_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB44_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB20_2: +; ZVFHMIN-NEXT: .LBB44_2: ; ZVFHMIN-NEXT: addi a1, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 @@ -571,10 +1180,10 @@ define <vscale x 32 x half> @vfdiv_vv_nxv32f16_unmasked(<vscale x 32 x half> %va ; ZVFHMIN-NEXT: vfdiv.vv v16, v16, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 -; ZVFHMIN-NEXT: bltu a0, a1, .LBB21_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB45_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB21_2: +; ZVFHMIN-NEXT: .LBB45_2: ; ZVFHMIN-NEXT: addi a1, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 @@ -649,10 +1258,10 @@ define <vscale x 32 x half> @vfdiv_vf_nxv32f16(<vscale x 32 x half> %va, half %b ; ZVFHMIN-NEXT: vfdiv.vv v16, v8, v16, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 -; ZVFHMIN-NEXT: bltu a0, a1, .LBB22_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB46_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB22_2: +; ZVFHMIN-NEXT: .LBB46_2: ; ZVFHMIN-NEXT: addi a1, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 @@ -730,10 +1339,10 @@ define <vscale x 32 x half> @vfdiv_vf_nxv32f16_unmasked(<vscale x 32 x half> %va ; ZVFHMIN-NEXT: vfdiv.vv v16, v24, v16, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 -; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB47_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB23_2: +; ZVFHMIN-NEXT: .LBB47_2: ; ZVFHMIN-NEXT: addi a1, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 diff --git a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll index b6bb0371121b..baecb7bb7d24 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll @@ -1,15 +1,1429 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ ; RUN: --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ ; RUN: --check-prefixes=CHECK,ZVFHMIN +declare <vscale x 1 x bfloat> @llvm.vp.fma.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x i1>, i32) + +define <vscale x 1 x bfloat> @vfma_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, <vscale x 1 x bfloat> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfma_vv_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfmadd.vv v12, v10, v11, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %v = call <vscale x 1 x bfloat> @llvm.vp.fma.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, <vscale x 1 x bfloat> %c, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x bfloat> %v +} + +define <vscale x 1 x bfloat> @vfma_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, <vscale x 1 x bfloat> %c, i32 zeroext %evl) { +; CHECK-LABEL: vfma_vv_nxv1bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfmadd.vv v12, v10, v11 +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %v = call <vscale x 1 x bfloat> @llvm.vp.fma.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, <vscale x 1 x bfloat> %c, <vscale x 1 x i1> splat (i1 true), i32 %evl) + ret <vscale x 1 x bfloat> %v +} + +define <vscale x 1 x bfloat> @vfma_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x bfloat> %vc, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfma_vf_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfmadd.vv v12, v9, v11, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x bfloat> @llvm.vp.fma.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %vc, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x bfloat> %v +} + +define <vscale x 1 x bfloat> @vfma_vf_nxv1bf16_commute(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x bfloat> %vc, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfma_vf_nxv1bf16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfmadd.vv v9, v8, v11, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x bfloat> @llvm.vp.fma.nxv1bf16(<vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vc, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x bfloat> %v +} + +define <vscale x 1 x bfloat> @vfma_vf_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x bfloat> %vc, i32 zeroext %evl) { +; CHECK-LABEL: vfma_vf_nxv1bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfmadd.vv v12, v9, v11 +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x bfloat> @llvm.vp.fma.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %vc, <vscale x 1 x i1> splat (i1 true), i32 %evl) + ret <vscale x 1 x bfloat> %v +} + +define <vscale x 1 x bfloat> @vfma_vf_nxv1bf16_unmasked_commute(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x bfloat> %vc, i32 zeroext %evl) { +; CHECK-LABEL: vfma_vf_nxv1bf16_unmasked_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfmadd.vv v12, v9, v11 +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x bfloat> @llvm.vp.fma.nxv1bf16(<vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vc, <vscale x 1 x i1> splat (i1 true), i32 %evl) + ret <vscale x 1 x bfloat> %v +} + +declare <vscale x 2 x bfloat> @llvm.vp.fma.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x i1>, i32) + +define <vscale x 2 x bfloat> @vfma_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, <vscale x 2 x bfloat> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfma_vv_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfmadd.vv v12, v10, v11, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %v = call <vscale x 2 x bfloat> @llvm.vp.fma.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, <vscale x 2 x bfloat> %c, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x bfloat> %v +} + +define <vscale x 2 x bfloat> @vfma_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, <vscale x 2 x bfloat> %c, i32 zeroext %evl) { +; CHECK-LABEL: vfma_vv_nxv2bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfmadd.vv v12, v10, v11 +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %v = call <vscale x 2 x bfloat> @llvm.vp.fma.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, <vscale x 2 x bfloat> %c, <vscale x 2 x i1> splat (i1 true), i32 %evl) + ret <vscale x 2 x bfloat> %v +} + +define <vscale x 2 x bfloat> @vfma_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloat %b, <vscale x 2 x bfloat> %vc, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfma_vf_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfmadd.vv v12, v9, v11, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 2 x bfloat> %elt.head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x bfloat> @llvm.vp.fma.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x bfloat> %vc, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x bfloat> %v +} + +define <vscale x 2 x bfloat> @vfma_vf_nxv2bf16_commute(<vscale x 2 x bfloat> %va, bfloat %b, <vscale x 2 x bfloat> %vc, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfma_vf_nxv2bf16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfmadd.vv v9, v8, v11, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 2 x bfloat> %elt.head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x bfloat> @llvm.vp.fma.nxv2bf16(<vscale x 2 x bfloat> %vb, <vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vc, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x bfloat> %v +} + +define <vscale x 2 x bfloat> @vfma_vf_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, bfloat %b, <vscale x 2 x bfloat> %vc, i32 zeroext %evl) { +; CHECK-LABEL: vfma_vf_nxv2bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfmadd.vv v12, v9, v11 +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 2 x bfloat> %elt.head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x bfloat> @llvm.vp.fma.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x bfloat> %vc, <vscale x 2 x i1> splat (i1 true), i32 %evl) + ret <vscale x 2 x bfloat> %v +} + +define <vscale x 2 x bfloat> @vfma_vf_nxv2bf16_unmasked_commute(<vscale x 2 x bfloat> %va, bfloat %b, <vscale x 2 x bfloat> %vc, i32 zeroext %evl) { +; CHECK-LABEL: vfma_vf_nxv2bf16_unmasked_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfmadd.vv v12, v9, v11 +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 2 x bfloat> %elt.head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x bfloat> @llvm.vp.fma.nxv2bf16(<vscale x 2 x bfloat> %vb, <vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vc, <vscale x 2 x i1> splat (i1 true), i32 %evl) + ret <vscale x 2 x bfloat> %v +} + +declare <vscale x 4 x bfloat> @llvm.vp.fma.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x i1>, i32) + +define <vscale x 4 x bfloat> @vfma_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, <vscale x 4 x bfloat> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfma_vv_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfmadd.vv v14, v10, v12, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v14 +; CHECK-NEXT: ret + %v = call <vscale x 4 x bfloat> @llvm.vp.fma.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, <vscale x 4 x bfloat> %c, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x bfloat> %v +} + +define <vscale x 4 x bfloat> @vfma_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, <vscale x 4 x bfloat> %c, i32 zeroext %evl) { +; CHECK-LABEL: vfma_vv_nxv4bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfmadd.vv v14, v10, v12 +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v14 +; CHECK-NEXT: ret + %v = call <vscale x 4 x bfloat> @llvm.vp.fma.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, <vscale x 4 x bfloat> %c, <vscale x 4 x i1> splat (i1 true), i32 %evl) + ret <vscale x 4 x bfloat> %v +} + +define <vscale x 4 x bfloat> @vfma_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloat %b, <vscale x 4 x bfloat> %vc, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfma_vf_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfmadd.vv v16, v14, v12, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 4 x bfloat> %elt.head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x bfloat> @llvm.vp.fma.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x bfloat> %vc, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x bfloat> %v +} + +define <vscale x 4 x bfloat> @vfma_vf_nxv4bf16_commute(<vscale x 4 x bfloat> %va, bfloat %b, <vscale x 4 x bfloat> %vc, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfma_vf_nxv4bf16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfmadd.vv v14, v8, v12, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v14 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 4 x bfloat> %elt.head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x bfloat> @llvm.vp.fma.nxv4bf16(<vscale x 4 x bfloat> %vb, <vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vc, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x bfloat> %v +} + +define <vscale x 4 x bfloat> @vfma_vf_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, bfloat %b, <vscale x 4 x bfloat> %vc, i32 zeroext %evl) { +; CHECK-LABEL: vfma_vf_nxv4bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfmadd.vv v16, v14, v12 +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 4 x bfloat> %elt.head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x bfloat> @llvm.vp.fma.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x bfloat> %vc, <vscale x 4 x i1> splat (i1 true), i32 %evl) + ret <vscale x 4 x bfloat> %v +} + +define <vscale x 4 x bfloat> @vfma_vf_nxv4bf16_unmasked_commute(<vscale x 4 x bfloat> %va, bfloat %b, <vscale x 4 x bfloat> %vc, i32 zeroext %evl) { +; CHECK-LABEL: vfma_vf_nxv4bf16_unmasked_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfmadd.vv v16, v14, v12 +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 4 x bfloat> %elt.head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x bfloat> @llvm.vp.fma.nxv4bf16(<vscale x 4 x bfloat> %vb, <vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vc, <vscale x 4 x i1> splat (i1 true), i32 %evl) + ret <vscale x 4 x bfloat> %v +} + +declare <vscale x 8 x bfloat> @llvm.vp.fma.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, i32) + +define <vscale x 8 x bfloat> @vfma_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfma_vv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v20, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfmadd.vv v20, v12, v16, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v20 +; CHECK-NEXT: ret + %v = call <vscale x 8 x bfloat> @llvm.vp.fma.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x bfloat> %v +} + +define <vscale x 8 x bfloat> @vfma_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 zeroext %evl) { +; CHECK-LABEL: vfma_vv_nxv8bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v20, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfmadd.vv v20, v12, v16 +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v20 +; CHECK-NEXT: ret + %v = call <vscale x 8 x bfloat> @llvm.vp.fma.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, <vscale x 8 x i1> splat (i1 true), i32 %evl) + ret <vscale x 8 x bfloat> %v +} + +define <vscale x 8 x bfloat> @vfma_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x bfloat> %vc, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfma_vf_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v12, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v20, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfmadd.vv v24, v20, v16, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x bfloat> @llvm.vp.fma.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %vc, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x bfloat> %v +} + +define <vscale x 8 x bfloat> @vfma_vf_nxv8bf16_commute(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x bfloat> %vc, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfma_vf_nxv8bf16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v12, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v20, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfmadd.vv v20, v8, v16, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v20 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x bfloat> @llvm.vp.fma.nxv8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vc, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x bfloat> %v +} + +define <vscale x 8 x bfloat> @vfma_vf_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x bfloat> %vc, i32 zeroext %evl) { +; CHECK-LABEL: vfma_vf_nxv8bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v12, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v20, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfmadd.vv v24, v20, v16 +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x bfloat> @llvm.vp.fma.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %vc, <vscale x 8 x i1> splat (i1 true), i32 %evl) + ret <vscale x 8 x bfloat> %v +} + +define <vscale x 8 x bfloat> @vfma_vf_nxv8bf16_unmasked_commute(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x bfloat> %vc, i32 zeroext %evl) { +; CHECK-LABEL: vfma_vf_nxv8bf16_unmasked_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v12, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v20, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfmadd.vv v24, v20, v16 +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x bfloat> @llvm.vp.fma.nxv8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vc, <vscale x 8 x i1> splat (i1 true), i32 %evl) + ret <vscale x 8 x bfloat> %v +} + +declare <vscale x 16 x bfloat> @llvm.vp.fma.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x bfloat>, <vscale x 16 x bfloat>, <vscale x 16 x i1>, i32) + +define <vscale x 16 x bfloat> @vfma_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, <vscale x 16 x bfloat> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfma_vv_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfmadd.vv v16, v24, v8, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call <vscale x 16 x bfloat> @llvm.vp.fma.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, <vscale x 16 x bfloat> %c, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x bfloat> %v +} + +define <vscale x 16 x bfloat> @vfma_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, <vscale x 16 x bfloat> %c, i32 zeroext %evl) { +; CHECK-LABEL: vfma_vv_nxv16bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfmadd.vv v0, v16, v24 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v0 +; CHECK-NEXT: ret + %v = call <vscale x 16 x bfloat> @llvm.vp.fma.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, <vscale x 16 x bfloat> %c, <vscale x 16 x i1> splat (i1 true), i32 %evl) + ret <vscale x 16 x bfloat> %v +} + +define <vscale x 16 x bfloat> @vfma_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bfloat %b, <vscale x 16 x bfloat> %vc, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfma_vf_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv.v.x v4, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v4 +; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfmadd.vv v16, v24, v8, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 16 x bfloat> %elt.head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x bfloat> @llvm.vp.fma.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x bfloat> %vc, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x bfloat> %v +} + +define <vscale x 16 x bfloat> @vfma_vf_nxv16bf16_commute(<vscale x 16 x bfloat> %va, bfloat %b, <vscale x 16 x bfloat> %vc, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfma_vf_nxv16bf16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv.v.x v4, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v4 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfmadd.vv v16, v8, v24, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 16 x bfloat> %elt.head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x bfloat> @llvm.vp.fma.nxv16bf16(<vscale x 16 x bfloat> %vb, <vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vc, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x bfloat> %v +} + +define <vscale x 16 x bfloat> @vfma_vf_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, bfloat %b, <vscale x 16 x bfloat> %vc, i32 zeroext %evl) { +; CHECK-LABEL: vfma_vf_nxv16bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv.v.x v16, a1 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8 +; CHECK-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfmadd.vv v16, v0, v24 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 16 x bfloat> %elt.head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x bfloat> @llvm.vp.fma.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x bfloat> %vc, <vscale x 16 x i1> splat (i1 true), i32 %evl) + ret <vscale x 16 x bfloat> %v +} + +define <vscale x 16 x bfloat> @vfma_vf_nxv16bf16_unmasked_commute(<vscale x 16 x bfloat> %va, bfloat %b, <vscale x 16 x bfloat> %vc, i32 zeroext %evl) { +; CHECK-LABEL: vfma_vf_nxv16bf16_unmasked_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv.v.x v16, a1 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8 +; CHECK-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfmadd.vv v16, v0, v24 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 16 x bfloat> %elt.head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x bfloat> @llvm.vp.fma.nxv16bf16(<vscale x 16 x bfloat> %vb, <vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vc, <vscale x 16 x i1> splat (i1 true), i32 %evl) + ret <vscale x 16 x bfloat> %v +} + +declare <vscale x 32 x bfloat> @llvm.vp.fma.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x bfloat>, <vscale x 32 x bfloat>, <vscale x 32 x i1>, i32) + +define <vscale x 32 x bfloat> @vfma_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %b, <vscale x 32 x bfloat> %c, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfma_vv_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a3, a3, a2 +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, a2, a3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x2a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 42 * vlenb +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vl8re16.v v0, (a0) +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a0, a2, 1 +; CHECK-NEXT: sub a3, a1, a0 +; CHECK-NEXT: sltu a4, a1, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: srli a2, a2, 2 +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: slli a4, a4, 4 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vs1r.v v24, (a4) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v24, v24, a2 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: mv a4, a2 +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a4, a4, a2 +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, a2, a4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: vmv4r.v v24, v8 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a4, a2, 5 +; CHECK-NEXT: add a2, a4, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a4, a2, 4 +; CHECK-NEXT: add a2, a4, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v0, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v4 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vl1r.v v0, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vfmadd.vv v16, v24, v8, v0.t +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: bltu a1, a0, .LBB30_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: .LBB30_2: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a2, a2, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a2, a0, 5 +; CHECK-NEXT: add a0, a2, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a2, a0, 4 +; CHECK-NEXT: add a0, a2, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a2, a0, 5 +; CHECK-NEXT: add a0, a2, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a2, a0, 5 +; CHECK-NEXT: add a0, a2, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vfmadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: vmv.v.v v16, v8 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call <vscale x 32 x bfloat> @llvm.vp.fma.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %b, <vscale x 32 x bfloat> %c, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x bfloat> %v +} + +define <vscale x 32 x bfloat> @vfma_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %b, <vscale x 32 x bfloat> %c, i32 zeroext %evl) { +; CHECK-LABEL: vfma_vv_nxv32bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 5 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; CHECK-NEXT: vmv8r.v v24, v16 +; CHECK-NEXT: vl8re16.v v16, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a0, a2, 1 +; CHECK-NEXT: sub a3, a1, a0 +; CHECK-NEXT: sltu a4, a1, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: srli a2, a2, 2 +; CHECK-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; CHECK-NEXT: vmset.m v7 +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v7, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v28 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: mv a4, a2 +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, a2, a4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v28 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vfmadd.vv v16, v24, v8, v0.t +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: bltu a1, a0, .LBB31_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: .LBB31_2: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v16 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vfmacc.vv v0, v16, v24 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call <vscale x 32 x bfloat> @llvm.vp.fma.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %b, <vscale x 32 x bfloat> %c, <vscale x 32 x i1> splat (i1 true), i32 %evl) + ret <vscale x 32 x bfloat> %v +} + +define <vscale x 32 x bfloat> @vfma_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bfloat %b, <vscale x 32 x bfloat> %vc, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfma_vf_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a2, a2, a1 +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x2a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 42 * vlenb +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m8, ta, ma +; CHECK-NEXT: vmv.v.x v24, a1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 5 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a1, a2, 1 +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: sltu a4, a0, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: srli a2, a2, 2 +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: slli a4, a4, 4 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vs1r.v v0, (a4) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: mv a4, a2 +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a4, a4, a2 +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, a2, a4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a4, a2, 4 +; CHECK-NEXT: add a2, a4, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a4, a2, 5 +; CHECK-NEXT: add a2, a4, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v28 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vfmadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v20, v8 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: bltu a0, a1, .LBB32_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB32_2: +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a2, a2, a1 +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 4 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a2, a2, a1 +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 5 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a2, a2, a1 +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfmadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: vmv.v.v v16, v8 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 32 x bfloat> %elt.head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x bfloat> @llvm.vp.fma.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x bfloat> %vc, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x bfloat> %v +} + +define <vscale x 32 x bfloat> @vfma_vf_nxv32bf16_commute(<vscale x 32 x bfloat> %va, bfloat %b, <vscale x 32 x bfloat> %vc, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfma_vf_nxv32bf16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a2, a2, a1 +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x2a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 42 * vlenb +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m8, ta, ma +; CHECK-NEXT: vmv.v.x v24, a1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 5 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a1, a2, 1 +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: sltu a4, a0, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: srli a2, a2, 2 +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: slli a4, a4, 4 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vs1r.v v0, (a4) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: mv a4, a2 +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a4, a4, a2 +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, a2, a4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a4, a2, 4 +; CHECK-NEXT: add a2, a4, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a4, a2, 5 +; CHECK-NEXT: add a2, a4, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v28 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vfmadd.vv v16, v8, v24, v0.t +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: bltu a0, a1, .LBB33_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB33_2: +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a2, a2, a1 +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 4 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 5 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a2, a2, a1 +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a2, a2, a1 +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t +; CHECK-NEXT: vmv.v.v v16, v8 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 32 x bfloat> %elt.head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x bfloat> @llvm.vp.fma.nxv32bf16(<vscale x 32 x bfloat> %vb, <vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vc, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x bfloat> %v +} + +define <vscale x 32 x bfloat> @vfma_vf_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, bfloat %b, <vscale x 32 x bfloat> %vc, i32 zeroext %evl) { +; CHECK-LABEL: vfma_vf_nxv32bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 5 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m8, ta, ma +; CHECK-NEXT: vmv.v.x v24, a1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a1, a2, 1 +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: sltu a4, a0, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: srli a2, a2, 2 +; CHECK-NEXT: vmset.m v7 +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v7, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: mv a4, a2 +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, a2, a4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v28 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vfmadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v20, v8 +; CHECK-NEXT: bltu a0, a1, .LBB34_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB34_2: +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v24 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfmadd.vv v0, v24, v8 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v16, v0 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 32 x bfloat> %elt.head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x bfloat> @llvm.vp.fma.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x bfloat> %vc, <vscale x 32 x i1> splat (i1 true), i32 %evl) + ret <vscale x 32 x bfloat> %v +} + +define <vscale x 32 x bfloat> @vfma_vf_nxv32bf16_unmasked_commute(<vscale x 32 x bfloat> %va, bfloat %b, <vscale x 32 x bfloat> %vc, i32 zeroext %evl) { +; CHECK-LABEL: vfma_vf_nxv32bf16_unmasked_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 5 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m8, ta, ma +; CHECK-NEXT: vmv.v.x v24, a1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a1, a2, 1 +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: sltu a4, a0, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: srli a2, a2, 2 +; CHECK-NEXT: vmset.m v7 +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v7, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: mv a4, a2 +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, a2, a4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v28 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vfmadd.vv v16, v8, v24, v0.t +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: bltu a0, a1, .LBB35_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB35_2: +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v16 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfmadd.vv v0, v24, v16 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 32 x bfloat> %elt.head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x bfloat> @llvm.vp.fma.nxv32bf16(<vscale x 32 x bfloat> %vb, <vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vc, <vscale x 32 x i1> splat (i1 true), i32 %evl) + ret <vscale x 32 x bfloat> %v +} + declare <vscale x 1 x half> @llvm.vp.fma.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, <vscale x 1 x half>, <vscale x 1 x i1>, i32) define <vscale x 1 x half> @vfma_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %b, <vscale x 1 x half> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) { @@ -833,8 +2247,12 @@ define <vscale x 32 x half> @vfma_vv_nxv32f16(<vscale x 32 x half> %va, <vscale ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: li a3, 42 -; ZVFHMIN-NEXT: mul a2, a2, a3 +; ZVFHMIN-NEXT: slli a2, a2, 1 +; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: slli a2, a2, 2 +; ZVFHMIN-NEXT: add a3, a3, a2 +; ZVFHMIN-NEXT: slli a2, a2, 2 +; ZVFHMIN-NEXT: add a2, a2, a3 ; ZVFHMIN-NEXT: sub sp, sp, a2 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x2a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 42 * vlenb ; ZVFHMIN-NEXT: vmv1r.v v24, v0 @@ -856,8 +2274,11 @@ define <vscale x 32 x half> @vfma_vv_nxv32f16(<vscale x 32 x half> %va, <vscale ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: li a4, 25 -; ZVFHMIN-NEXT: mul a2, a2, a4 +; ZVFHMIN-NEXT: mv a4, a2 +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: add a4, a4, a2 +; ZVFHMIN-NEXT: slli a2, a2, 1 +; ZVFHMIN-NEXT: add a2, a2, a4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill @@ -897,13 +2318,16 @@ define <vscale x 32 x half> @vfma_vv_nxv32f16(<vscale x 32 x half> %va, <vscale ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: bltu a1, a0, .LBB30_2 +; ZVFHMIN-NEXT: bltu a1, a0, .LBB66_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a1, a0 -; ZVFHMIN-NEXT: .LBB30_2: +; ZVFHMIN-NEXT: .LBB66_2: ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a2, 25 -; ZVFHMIN-NEXT: mul a0, a0, a2 +; ZVFHMIN-NEXT: mv a2, a0 +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a2, a2, a0 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: add a0, a0, a2 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -957,8 +2381,12 @@ define <vscale x 32 x half> @vfma_vv_nxv32f16(<vscale x 32 x half> %va, <vscale ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a1, 42 -; ZVFHMIN-NEXT: mul a0, a0, a1 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: add a1, a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret @@ -985,8 +2413,10 @@ define <vscale x 32 x half> @vfma_vv_nxv32f16_unmasked(<vscale x 32 x half> %va, ; ZVFHMIN-NEXT: vmv8r.v v24, v16 ; ZVFHMIN-NEXT: vl8re16.v v16, (a0) ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a0, a0, a2 +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a2, a0 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: add a0, a0, a2 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill @@ -1017,8 +2447,10 @@ define <vscale x 32 x half> @vfma_vv_nxv32f16_unmasked(<vscale x 32 x half> %va, ; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: li a4, 24 -; ZVFHMIN-NEXT: mul a2, a2, a4 +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: mv a4, a2 +; ZVFHMIN-NEXT: slli a2, a2, 1 +; ZVFHMIN-NEXT: add a2, a2, a4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload @@ -1029,10 +2461,10 @@ define <vscale x 32 x half> @vfma_vv_nxv32f16_unmasked(<vscale x 32 x half> %va, ; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 -; ZVFHMIN-NEXT: bltu a1, a0, .LBB31_2 +; ZVFHMIN-NEXT: bltu a1, a0, .LBB67_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a1, a0 -; ZVFHMIN-NEXT: .LBB31_2: +; ZVFHMIN-NEXT: .LBB67_2: ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add a0, sp, a0 @@ -1048,8 +2480,10 @@ define <vscale x 32 x half> @vfma_vv_nxv32f16_unmasked(<vscale x 32 x half> %va, ; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a0, a0, a2 +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a2, a0 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: add a0, a0, a2 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -1081,8 +2515,12 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16(<vscale x 32 x half> %va, half %b, ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 42 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 2 +; ZVFHMIN-NEXT: add a2, a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 2 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x2a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 42 * vlenb ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 @@ -1109,8 +2547,11 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16(<vscale x 32 x half> %va, half %b, ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a2 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: li a4, 25 -; ZVFHMIN-NEXT: mul a2, a2, a4 +; ZVFHMIN-NEXT: mv a4, a2 +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: add a4, a4, a2 +; ZVFHMIN-NEXT: slli a2, a2, 1 +; ZVFHMIN-NEXT: add a2, a2, a4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill @@ -1146,13 +2587,16 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16(<vscale x 32 x half> %va, half %b, ; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v8 ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: bltu a0, a1, .LBB32_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB68_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB32_2: +; ZVFHMIN-NEXT: .LBB68_2: ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 25 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: add a2, a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload @@ -1170,8 +2614,11 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16(<vscale x 32 x half> %va, half %b, ; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 25 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: add a2, a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill @@ -1193,8 +2640,11 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16(<vscale x 32 x half> %va, half %b, ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 25 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: add a2, a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload @@ -1206,8 +2656,12 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16(<vscale x 32 x half> %va, half %b, ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a1, 42 -; ZVFHMIN-NEXT: mul a0, a0, a1 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: add a1, a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret @@ -1229,8 +2683,12 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16_commute(<vscale x 32 x half> %va, ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 42 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 2 +; ZVFHMIN-NEXT: add a2, a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 2 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x2a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 42 * vlenb ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 @@ -1257,8 +2715,11 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16_commute(<vscale x 32 x half> %va, ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a2 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: li a4, 25 -; ZVFHMIN-NEXT: mul a2, a2, a4 +; ZVFHMIN-NEXT: mv a4, a2 +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: add a4, a4, a2 +; ZVFHMIN-NEXT: slli a2, a2, 1 +; ZVFHMIN-NEXT: add a2, a2, a4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill @@ -1294,13 +2755,16 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16_commute(<vscale x 32 x half> %va, ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: bltu a0, a1, .LBB33_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB69_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB33_2: +; ZVFHMIN-NEXT: .LBB69_2: ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 25 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: add a2, a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload @@ -1325,8 +2789,11 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16_commute(<vscale x 32 x half> %va, ; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v0 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 25 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: add a2, a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill @@ -1341,8 +2808,11 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16_commute(<vscale x 32 x half> %va, ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 25 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: add a2, a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload @@ -1354,8 +2824,12 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16_commute(<vscale x 32 x half> %va, ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a1, 42 -; ZVFHMIN-NEXT: mul a0, a0, a1 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: add a1, a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret @@ -1384,8 +2858,10 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16_unmasked(<vscale x 32 x half> %va, ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v24, a1 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill @@ -1415,8 +2891,10 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16_unmasked(<vscale x 32 x half> %va, ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: li a4, 24 -; ZVFHMIN-NEXT: mul a2, a2, a4 +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: mv a4, a2 +; ZVFHMIN-NEXT: slli a2, a2, 1 +; ZVFHMIN-NEXT: add a2, a2, a4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload @@ -1427,10 +2905,10 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16_unmasked(<vscale x 32 x half> %va, ; ZVFHMIN-NEXT: vfmadd.vv v8, v16, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v8 -; ZVFHMIN-NEXT: bltu a0, a1, .LBB34_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB70_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB34_2: +; ZVFHMIN-NEXT: .LBB70_2: ; ZVFHMIN-NEXT: csrr a1, vlenb ; ZVFHMIN-NEXT: slli a1, a1, 4 ; ZVFHMIN-NEXT: add a1, sp, a1 @@ -1446,8 +2924,10 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16_unmasked(<vscale x 32 x half> %va, ; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload @@ -1489,8 +2969,10 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16_unmasked_commute(<vscale x 32 x ha ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v24, a1 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill @@ -1520,8 +3002,10 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16_unmasked_commute(<vscale x 32 x ha ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: li a4, 24 -; ZVFHMIN-NEXT: mul a2, a2, a4 +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: mv a4, a2 +; ZVFHMIN-NEXT: slli a2, a2, 1 +; ZVFHMIN-NEXT: add a2, a2, a4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload @@ -1532,10 +3016,10 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16_unmasked_commute(<vscale x 32 x ha ; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 -; ZVFHMIN-NEXT: bltu a0, a1, .LBB35_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB71_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB35_2: +; ZVFHMIN-NEXT: .LBB71_2: ; ZVFHMIN-NEXT: csrr a1, vlenb ; ZVFHMIN-NEXT: slli a1, a1, 4 ; ZVFHMIN-NEXT: add a1, sp, a1 @@ -1551,8 +3035,10 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16_unmasked_commute(<vscale x 32 x ha ; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload @@ -2250,14 +3736,18 @@ define <vscale x 16 x double> @vfma_vv_nxv16f64(<vscale x 16 x double> %va, <vsc ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 40 -; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: mv a3, a1 +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb ; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: mv a3, a1 +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, a1, a3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill @@ -2294,8 +3784,10 @@ define <vscale x 16 x double> @vfma_vv_nxv16f64(<vscale x 16 x double> %va, <vsc ; CHECK-NEXT: vslidedown.vx v0, v0, a3 ; CHECK-NEXT: and a0, a7, a6 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, a2, a3 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload @@ -2307,15 +3799,17 @@ define <vscale x 16 x double> @vfma_vv_nxv16f64(<vscale x 16 x double> %va, <vsc ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 24 -; CHECK-NEXT: mul a0, a0, a2 +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a2 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a4, a1, .LBB92_2 +; CHECK-NEXT: bltu a4, a1, .LBB128_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a4, a1 -; CHECK-NEXT: .LBB92_2: +; CHECK-NEXT: .LBB128_2: ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 5 @@ -2333,14 +3827,18 @@ define <vscale x 16 x double> @vfma_vv_nxv16f64(<vscale x 16 x double> %va, <vsc ; CHECK-NEXT: vfmadd.vv v16, v24, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 40 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -2354,8 +3852,10 @@ define <vscale x 16 x double> @vfma_vv_nxv16f64_unmasked(<vscale x 16 x double> ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: mv a3, a1 +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; CHECK-NEXT: csrr a1, vlenb @@ -2389,10 +3889,10 @@ define <vscale x 16 x double> @vfma_vv_nxv16f64_unmasked(<vscale x 16 x double> ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v16, v8, v24 -; CHECK-NEXT: bltu a4, a1, .LBB93_2 +; CHECK-NEXT: bltu a4, a1, .LBB129_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a4, a1 -; CHECK-NEXT: .LBB93_2: +; CHECK-NEXT: .LBB129_2: ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 @@ -2404,8 +3904,10 @@ define <vscale x 16 x double> @vfma_vv_nxv16f64_unmasked(<vscale x 16 x double> ; CHECK-NEXT: vfmadd.vv v0, v24, v8 ; CHECK-NEXT: vmv.v.v v8, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -7161,8 +8663,12 @@ define <vscale x 32 x half> @vfmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vscal ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: li a3, 42 -; ZVFHMIN-NEXT: mul a2, a2, a3 +; ZVFHMIN-NEXT: slli a2, a2, 1 +; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: slli a2, a2, 2 +; ZVFHMIN-NEXT: add a3, a3, a2 +; ZVFHMIN-NEXT: slli a2, a2, 2 +; ZVFHMIN-NEXT: add a2, a2, a3 ; ZVFHMIN-NEXT: sub sp, sp, a2 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x2a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 42 * vlenb ; ZVFHMIN-NEXT: vl8re16.v v24, (a0) @@ -7183,8 +8689,11 @@ define <vscale x 32 x half> @vfmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vscal ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a2, a0, 1 ; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: li a4, 25 -; ZVFHMIN-NEXT: mul a3, a3, a4 +; ZVFHMIN-NEXT: mv a4, a3 +; ZVFHMIN-NEXT: slli a3, a3, 3 +; ZVFHMIN-NEXT: add a4, a4, a3 +; ZVFHMIN-NEXT: slli a3, a3, 1 +; ZVFHMIN-NEXT: add a3, a3, a4 ; ZVFHMIN-NEXT: add a3, sp, a3 ; ZVFHMIN-NEXT: addi a3, a3, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill @@ -7197,10 +8706,10 @@ define <vscale x 32 x half> @vfmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vscal ; ZVFHMIN-NEXT: addi a3, a3, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: mv a3, a1 -; ZVFHMIN-NEXT: bltu a1, a2, .LBB244_2 +; ZVFHMIN-NEXT: bltu a1, a2, .LBB280_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a3, a2 -; ZVFHMIN-NEXT: .LBB244_2: +; ZVFHMIN-NEXT: .LBB280_2: ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: csrr a4, vlenb ; ZVFHMIN-NEXT: slli a4, a4, 3 @@ -7230,8 +8739,11 @@ define <vscale x 32 x half> @vfmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vscal ; ZVFHMIN-NEXT: addi a3, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: li a4, 25 -; ZVFHMIN-NEXT: mul a3, a3, a4 +; ZVFHMIN-NEXT: mv a4, a3 +; ZVFHMIN-NEXT: slli a3, a3, 3 +; ZVFHMIN-NEXT: add a4, a4, a3 +; ZVFHMIN-NEXT: slli a3, a3, 1 +; ZVFHMIN-NEXT: add a3, a3, a4 ; ZVFHMIN-NEXT: add a3, sp, a3 ; ZVFHMIN-NEXT: addi a3, a3, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload @@ -7258,8 +8770,11 @@ define <vscale x 32 x half> @vfmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vscal ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a2, 25 -; ZVFHMIN-NEXT: mul a0, a0, a2 +; ZVFHMIN-NEXT: mv a2, a0 +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a2, a2, a0 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: add a0, a0, a2 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill @@ -7271,8 +8786,11 @@ define <vscale x 32 x half> @vfmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vscal ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a2, 25 -; ZVFHMIN-NEXT: mul a0, a0, a2 +; ZVFHMIN-NEXT: mv a2, a0 +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a2, a2, a0 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: add a0, a0, a2 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload @@ -7284,8 +8802,12 @@ define <vscale x 32 x half> @vfmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vscal ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24 ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a1, 42 -; ZVFHMIN-NEXT: mul a0, a0, a1 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: add a1, a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret @@ -7307,8 +8829,10 @@ define <vscale x 32 x half> @vfmsub_vv_nxv32f16_unmasked(<vscale x 32 x half> %v ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: li a3, 40 -; ZVFHMIN-NEXT: mul a2, a2, a3 +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: slli a2, a2, 2 +; ZVFHMIN-NEXT: add a2, a2, a3 ; ZVFHMIN-NEXT: sub sp, sp, a2 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb ; ZVFHMIN-NEXT: vl8re16.v v24, (a0) @@ -7316,8 +8840,10 @@ define <vscale x 32 x half> @vfmsub_vv_nxv32f16_unmasked(<vscale x 32 x half> %v ; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; ZVFHMIN-NEXT: vxor.vx v0, v24, a0 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a0, a0, a2 +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a2, a0 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: add a0, a0, a2 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill @@ -7369,8 +8895,10 @@ define <vscale x 32 x half> @vfmsub_vv_nxv32f16_unmasked(<vscale x 32 x half> %v ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v16 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: li a3, 24 -; ZVFHMIN-NEXT: mul a2, a2, a3 +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: slli a2, a2, 1 +; ZVFHMIN-NEXT: add a2, a2, a3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload @@ -7380,10 +8908,10 @@ define <vscale x 32 x half> @vfmsub_vv_nxv32f16_unmasked(<vscale x 32 x half> %v ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: bltu a1, a0, .LBB245_2 +; ZVFHMIN-NEXT: bltu a1, a0, .LBB281_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a1, a0 -; ZVFHMIN-NEXT: .LBB245_2: +; ZVFHMIN-NEXT: .LBB281_2: ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 5 ; ZVFHMIN-NEXT: add a0, sp, a0 @@ -7404,8 +8932,10 @@ define <vscale x 32 x half> @vfmsub_vv_nxv32f16_unmasked(<vscale x 32 x half> %v ; ZVFHMIN-NEXT: vfncvt.f.f.w v0, v24 ; ZVFHMIN-NEXT: vmv8r.v v8, v0 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a1, 40 -; ZVFHMIN-NEXT: mul a0, a0, a1 +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret @@ -7433,8 +8963,10 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16(<vscale x 32 x half> %va, half % ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v24, a1 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill @@ -7456,15 +8988,17 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16(<vscale x 32 x half> %va, half % ; ZVFHMIN-NEXT: addi a3, a3, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: mv a3, a0 -; ZVFHMIN-NEXT: bltu a0, a2, .LBB246_2 +; ZVFHMIN-NEXT: bltu a0, a2, .LBB282_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a3, a2 -; ZVFHMIN-NEXT: .LBB246_2: +; ZVFHMIN-NEXT: .LBB282_2: ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vmv4r.v v4, v12 ; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: li a5, 24 -; ZVFHMIN-NEXT: mul a4, a4, a5 +; ZVFHMIN-NEXT: slli a4, a4, 3 +; ZVFHMIN-NEXT: mv a5, a4 +; ZVFHMIN-NEXT: slli a4, a4, 1 +; ZVFHMIN-NEXT: add a4, a4, a5 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload @@ -7505,8 +9039,10 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16(<vscale x 32 x half> %va, half % ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload @@ -7559,8 +9095,10 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16_commute(<vscale x 32 x half> %va ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v24, a1 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill @@ -7582,15 +9120,17 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16_commute(<vscale x 32 x half> %va ; ZVFHMIN-NEXT: addi a3, a3, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: mv a3, a0 -; ZVFHMIN-NEXT: bltu a0, a2, .LBB247_2 +; ZVFHMIN-NEXT: bltu a0, a2, .LBB283_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a3, a2 -; ZVFHMIN-NEXT: .LBB247_2: +; ZVFHMIN-NEXT: .LBB283_2: ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vmv4r.v v4, v12 ; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: li a5, 24 -; ZVFHMIN-NEXT: mul a4, a4, a5 +; ZVFHMIN-NEXT: slli a4, a4, 3 +; ZVFHMIN-NEXT: mv a5, a4 +; ZVFHMIN-NEXT: slli a4, a4, 1 +; ZVFHMIN-NEXT: add a4, a4, a5 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload @@ -7626,8 +9166,10 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16_commute(<vscale x 32 x half> %va ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload @@ -7685,8 +9227,10 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16_unmasked(<vscale x 32 x half> %v ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v24, a1 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill @@ -7720,8 +9264,10 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16_unmasked(<vscale x 32 x half> %v ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: li a4, 24 -; ZVFHMIN-NEXT: mul a2, a2, a4 +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: mv a4, a2 +; ZVFHMIN-NEXT: slli a2, a2, 1 +; ZVFHMIN-NEXT: add a2, a2, a4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload @@ -7743,10 +9289,10 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16_unmasked(<vscale x 32 x half> %v ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: bltu a0, a1, .LBB248_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB284_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB248_2: +; ZVFHMIN-NEXT: .LBB284_2: ; ZVFHMIN-NEXT: csrr a1, vlenb ; ZVFHMIN-NEXT: slli a1, a1, 4 ; ZVFHMIN-NEXT: add a1, sp, a1 @@ -7754,8 +9300,10 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16_unmasked(<vscale x 32 x half> %v ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload @@ -7803,8 +9351,10 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16_unmasked_commute(<vscale x 32 x ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v8, a1 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill @@ -7838,8 +9388,10 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16_unmasked_commute(<vscale x 32 x ; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: li a4, 24 -; ZVFHMIN-NEXT: mul a2, a2, a4 +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: mv a4, a2 +; ZVFHMIN-NEXT: slli a2, a2, 1 +; ZVFHMIN-NEXT: add a2, a2, a4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload @@ -7861,10 +9413,10 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16_unmasked_commute(<vscale x 32 x ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: bltu a0, a1, .LBB249_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB285_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB249_2: +; ZVFHMIN-NEXT: .LBB285_2: ; ZVFHMIN-NEXT: csrr a1, vlenb ; ZVFHMIN-NEXT: slli a1, a1, 4 ; ZVFHMIN-NEXT: add a1, sp, a1 @@ -7872,8 +9424,10 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16_unmasked_commute(<vscale x 32 x ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload @@ -7919,8 +9473,10 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16(<vscale x 32 x half> %va, <vsca ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb ; ZVFHMIN-NEXT: vl8re16.v v24, (a0) ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a0, a0, a2 +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a2, a0 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: add a0, a0, a2 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill @@ -7945,13 +9501,15 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16(<vscale x 32 x half> %va, <vsca ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: bltu a1, a2, .LBB250_2 +; ZVFHMIN-NEXT: bltu a1, a2, .LBB286_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a3, a2 -; ZVFHMIN-NEXT: .LBB250_2: +; ZVFHMIN-NEXT: .LBB286_2: ; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: li a5, 24 -; ZVFHMIN-NEXT: mul a4, a4, a5 +; ZVFHMIN-NEXT: slli a4, a4, 3 +; ZVFHMIN-NEXT: mv a5, a4 +; ZVFHMIN-NEXT: slli a4, a4, 1 +; ZVFHMIN-NEXT: add a4, a4, a5 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload @@ -7986,8 +9544,10 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16(<vscale x 32 x half> %va, <vsca ; ZVFHMIN-NEXT: vsetvli a2, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a0 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a0, a0, a2 +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a2, a0 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: add a0, a0, a2 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -8029,8 +9589,10 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16_commuted(<vscale x 32 x half> % ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: li a3, 40 -; ZVFHMIN-NEXT: mul a2, a2, a3 +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: slli a2, a2, 2 +; ZVFHMIN-NEXT: add a2, a2, a3 ; ZVFHMIN-NEXT: sub sp, sp, a2 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb ; ZVFHMIN-NEXT: vl8re16.v v24, (a0) @@ -8061,15 +9623,17 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16_commuted(<vscale x 32 x half> % ; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16 ; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: li a5, 24 -; ZVFHMIN-NEXT: mul a4, a4, a5 +; ZVFHMIN-NEXT: slli a4, a4, 3 +; ZVFHMIN-NEXT: mv a5, a4 +; ZVFHMIN-NEXT: slli a4, a4, 1 +; ZVFHMIN-NEXT: add a4, a4, a5 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: bltu a1, a2, .LBB251_2 +; ZVFHMIN-NEXT: bltu a1, a2, .LBB287_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a3, a2 -; ZVFHMIN-NEXT: .LBB251_2: +; ZVFHMIN-NEXT: .LBB287_2: ; ZVFHMIN-NEXT: csrr a4, vlenb ; ZVFHMIN-NEXT: slli a4, a4, 5 ; ZVFHMIN-NEXT: add a4, sp, a4 @@ -8077,8 +9641,10 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16_commuted(<vscale x 32 x half> % ; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: li a5, 24 -; ZVFHMIN-NEXT: mul a4, a4, a5 +; ZVFHMIN-NEXT: slli a4, a4, 3 +; ZVFHMIN-NEXT: mv a5, a4 +; ZVFHMIN-NEXT: slli a4, a4, 1 +; ZVFHMIN-NEXT: add a4, a4, a5 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload @@ -8094,8 +9660,10 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16_commuted(<vscale x 32 x half> % ; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4 ; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: li a4, 24 -; ZVFHMIN-NEXT: mul a3, a3, a4 +; ZVFHMIN-NEXT: slli a3, a3, 3 +; ZVFHMIN-NEXT: mv a4, a3 +; ZVFHMIN-NEXT: slli a3, a3, 1 +; ZVFHMIN-NEXT: add a3, a3, a4 ; ZVFHMIN-NEXT: add a3, sp, a3 ; ZVFHMIN-NEXT: addi a3, a3, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill @@ -8125,8 +9693,10 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16_commuted(<vscale x 32 x half> % ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a0, a0, a2 +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a2, a0 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: add a0, a0, a2 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -8143,8 +9713,10 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16_commuted(<vscale x 32 x half> % ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a1, 40 -; ZVFHMIN-NEXT: mul a0, a0, a1 +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret @@ -8171,8 +9743,10 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16_unmasked(<vscale x 32 x half> % ; ZVFHMIN-NEXT: sub sp, sp, a2 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: li a3, 24 -; ZVFHMIN-NEXT: mul a2, a2, a3 +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: slli a2, a2, 1 +; ZVFHMIN-NEXT: add a2, a2, a3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill @@ -8209,8 +9783,10 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16_unmasked(<vscale x 32 x half> % ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: li a4, 24 -; ZVFHMIN-NEXT: mul a2, a2, a4 +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: mv a4, a2 +; ZVFHMIN-NEXT: slli a2, a2, 1 +; ZVFHMIN-NEXT: add a2, a2, a4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload @@ -8238,13 +9814,15 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16_unmasked(<vscale x 32 x half> % ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: bltu a1, a0, .LBB252_2 +; ZVFHMIN-NEXT: bltu a1, a0, .LBB288_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a1, a0 -; ZVFHMIN-NEXT: .LBB252_2: +; ZVFHMIN-NEXT: .LBB288_2: ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a0, a0, a2 +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a2, a0 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: add a0, a0, a2 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -8287,8 +9865,10 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16_unmasked_commuted(<vscale x 32 ; ZVFHMIN-NEXT: sub sp, sp, a2 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: li a3, 24 -; ZVFHMIN-NEXT: mul a2, a2, a3 +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: slli a2, a2, 1 +; ZVFHMIN-NEXT: add a2, a2, a3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill @@ -8325,8 +9905,10 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16_unmasked_commuted(<vscale x 32 ; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: li a4, 24 -; ZVFHMIN-NEXT: mul a2, a2, a4 +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: mv a4, a2 +; ZVFHMIN-NEXT: slli a2, a2, 1 +; ZVFHMIN-NEXT: add a2, a2, a4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload @@ -8354,13 +9936,15 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16_unmasked_commuted(<vscale x 32 ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: bltu a1, a0, .LBB253_2 +; ZVFHMIN-NEXT: bltu a1, a0, .LBB289_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a1, a0 -; ZVFHMIN-NEXT: .LBB253_2: +; ZVFHMIN-NEXT: .LBB289_2: ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a0, a0, a2 +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a2, a0 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: add a0, a0, a2 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload @@ -8398,8 +9982,10 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16(<vscale x 32 x half> %va, half ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 40 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 2 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 @@ -8422,8 +10008,10 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16(<vscale x 32 x half> %va, half ; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 ; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: li a5, 24 -; ZVFHMIN-NEXT: mul a4, a4, a5 +; ZVFHMIN-NEXT: slli a4, a4, 3 +; ZVFHMIN-NEXT: mv a5, a4 +; ZVFHMIN-NEXT: slli a4, a4, 1 +; ZVFHMIN-NEXT: add a4, a4, a5 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill @@ -8434,10 +10022,10 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16(<vscale x 32 x half> %va, half ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: bltu a0, a2, .LBB254_2 +; ZVFHMIN-NEXT: bltu a0, a2, .LBB290_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a3, a2 -; ZVFHMIN-NEXT: .LBB254_2: +; ZVFHMIN-NEXT: .LBB290_2: ; ZVFHMIN-NEXT: csrr a4, vlenb ; ZVFHMIN-NEXT: slli a4, a4, 5 ; ZVFHMIN-NEXT: add a4, sp, a4 @@ -8445,8 +10033,10 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16(<vscale x 32 x half> %va, half ; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 ; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: li a5, 24 -; ZVFHMIN-NEXT: mul a4, a4, a5 +; ZVFHMIN-NEXT: slli a4, a4, 3 +; ZVFHMIN-NEXT: mv a5, a4 +; ZVFHMIN-NEXT: slli a4, a4, 1 +; ZVFHMIN-NEXT: add a4, a4, a5 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload @@ -8467,8 +10057,10 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16(<vscale x 32 x half> %va, half ; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20 ; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: li a4, 24 -; ZVFHMIN-NEXT: mul a3, a3, a4 +; ZVFHMIN-NEXT: slli a3, a3, 3 +; ZVFHMIN-NEXT: mv a4, a3 +; ZVFHMIN-NEXT: slli a3, a3, 1 +; ZVFHMIN-NEXT: add a3, a3, a4 ; ZVFHMIN-NEXT: add a3, sp, a3 ; ZVFHMIN-NEXT: addi a3, a3, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill @@ -8493,8 +10085,10 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16(<vscale x 32 x half> %va, half ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload @@ -8514,8 +10108,10 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16(<vscale x 32 x half> %va, half ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24 ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a1, 40 -; ZVFHMIN-NEXT: mul a0, a0, a1 +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret @@ -8546,8 +10142,10 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_commute(<vscale x 32 x half> %v ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v24, a1 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill @@ -8572,13 +10170,15 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_commute(<vscale x 32 x half> %v ; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vmv4r.v v4, v12 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: bltu a0, a2, .LBB255_2 +; ZVFHMIN-NEXT: bltu a0, a2, .LBB291_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a3, a2 -; ZVFHMIN-NEXT: .LBB255_2: +; ZVFHMIN-NEXT: .LBB291_2: ; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: li a5, 24 -; ZVFHMIN-NEXT: mul a4, a4, a5 +; ZVFHMIN-NEXT: slli a4, a4, 3 +; ZVFHMIN-NEXT: mv a5, a4 +; ZVFHMIN-NEXT: slli a4, a4, 1 +; ZVFHMIN-NEXT: add a4, a4, a5 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload @@ -8613,8 +10213,10 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_commute(<vscale x 32 x half> %v ; ZVFHMIN-NEXT: vsetvli a2, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a1 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload @@ -8674,8 +10276,10 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_unmasked(<vscale x 32 x half> % ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v24, a1 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill @@ -8711,8 +10315,10 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_unmasked(<vscale x 32 x half> % ; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: li a4, 24 -; ZVFHMIN-NEXT: mul a2, a2, a4 +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: mv a4, a2 +; ZVFHMIN-NEXT: slli a2, a2, 1 +; ZVFHMIN-NEXT: add a2, a2, a4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload @@ -8740,13 +10346,15 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_unmasked(<vscale x 32 x half> % ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: bltu a0, a1, .LBB256_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB292_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB256_2: +; ZVFHMIN-NEXT: .LBB292_2: ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload @@ -8793,8 +10401,10 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_unmasked_commute(<vscale x 32 x ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v24, a1 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill @@ -8830,8 +10440,10 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_unmasked_commute(<vscale x 32 x ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: li a4, 24 -; ZVFHMIN-NEXT: mul a2, a2, a4 +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: mv a4, a2 +; ZVFHMIN-NEXT: slli a2, a2, 1 +; ZVFHMIN-NEXT: add a2, a2, a4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v0, (a2) # Unknown-size Folded Reload @@ -8860,13 +10472,15 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_unmasked_commute(<vscale x 32 x ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: bltu a0, a1, .LBB257_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB293_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB257_2: +; ZVFHMIN-NEXT: .LBB293_2: ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload @@ -8910,8 +10524,10 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat(<vscale x 32 x half> ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill @@ -8939,13 +10555,15 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat(<vscale x 32 x half> ; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vmv4r.v v4, v12 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: bltu a0, a2, .LBB258_2 +; ZVFHMIN-NEXT: bltu a0, a2, .LBB294_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a3, a2 -; ZVFHMIN-NEXT: .LBB258_2: +; ZVFHMIN-NEXT: .LBB294_2: ; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: li a5, 24 -; ZVFHMIN-NEXT: mul a4, a4, a5 +; ZVFHMIN-NEXT: slli a4, a4, 3 +; ZVFHMIN-NEXT: mv a5, a4 +; ZVFHMIN-NEXT: slli a4, a4, 1 +; ZVFHMIN-NEXT: add a4, a4, a5 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload @@ -8984,8 +10602,10 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat(<vscale x 32 x half> ; ZVFHMIN-NEXT: vsetvli a2, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a1 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload @@ -9032,8 +10652,10 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat_commute(<vscale x 32 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill @@ -9063,13 +10685,15 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat_commute(<vscale x 32 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: bltu a0, a2, .LBB259_2 +; ZVFHMIN-NEXT: bltu a0, a2, .LBB295_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a3, a2 -; ZVFHMIN-NEXT: .LBB259_2: +; ZVFHMIN-NEXT: .LBB295_2: ; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: li a5, 24 -; ZVFHMIN-NEXT: mul a4, a4, a5 +; ZVFHMIN-NEXT: slli a4, a4, 3 +; ZVFHMIN-NEXT: mv a5, a4 +; ZVFHMIN-NEXT: slli a4, a4, 1 +; ZVFHMIN-NEXT: add a4, a4, a5 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload @@ -9109,8 +10733,10 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat_commute(<vscale x 32 ; ZVFHMIN-NEXT: vsetvli a2, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a1 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload @@ -9160,8 +10786,10 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat_unmasked(<vscale x 32 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill @@ -9200,8 +10828,10 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat_unmasked(<vscale x 32 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: li a4, 24 -; ZVFHMIN-NEXT: mul a2, a2, a4 +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: mv a4, a2 +; ZVFHMIN-NEXT: slli a2, a2, 1 +; ZVFHMIN-NEXT: add a2, a2, a4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v0, (a2) # Unknown-size Folded Reload @@ -9230,13 +10860,15 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat_unmasked(<vscale x 32 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: bltu a0, a1, .LBB260_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB296_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB260_2: +; ZVFHMIN-NEXT: .LBB296_2: ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload @@ -9280,8 +10912,10 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat_unmasked_commute(<vsc ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill @@ -9320,8 +10954,10 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat_unmasked_commute(<vsc ; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: li a4, 24 -; ZVFHMIN-NEXT: mul a2, a2, a4 +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: mv a4, a2 +; ZVFHMIN-NEXT: slli a2, a2, 1 +; ZVFHMIN-NEXT: add a2, a2, a4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload @@ -9349,13 +10985,15 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat_unmasked_commute(<vsc ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: bltu a0, a1, .LBB261_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB297_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB261_2: +; ZVFHMIN-NEXT: .LBB297_2: ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload @@ -9402,8 +11040,10 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vsca ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb ; ZVFHMIN-NEXT: vl8re16.v v24, (a0) ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a0, a0, a2 +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a2, a0 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: add a0, a0, a2 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill @@ -9428,13 +11068,15 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vsca ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: bltu a1, a2, .LBB262_2 +; ZVFHMIN-NEXT: bltu a1, a2, .LBB298_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a3, a2 -; ZVFHMIN-NEXT: .LBB262_2: +; ZVFHMIN-NEXT: .LBB298_2: ; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: li a5, 24 -; ZVFHMIN-NEXT: mul a4, a4, a5 +; ZVFHMIN-NEXT: slli a4, a4, 3 +; ZVFHMIN-NEXT: mv a5, a4 +; ZVFHMIN-NEXT: slli a4, a4, 1 +; ZVFHMIN-NEXT: add a4, a4, a5 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload @@ -9469,8 +11111,10 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vsca ; ZVFHMIN-NEXT: vsetvli a2, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a0 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a0, a0, a2 +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a2, a0 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: add a0, a0, a2 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -9512,8 +11156,10 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16_commuted(<vscale x 32 x half> % ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: li a3, 40 -; ZVFHMIN-NEXT: mul a2, a2, a3 +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: slli a2, a2, 2 +; ZVFHMIN-NEXT: add a2, a2, a3 ; ZVFHMIN-NEXT: sub sp, sp, a2 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb ; ZVFHMIN-NEXT: vl8re16.v v24, (a0) @@ -9544,15 +11190,17 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16_commuted(<vscale x 32 x half> % ; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16 ; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: li a5, 24 -; ZVFHMIN-NEXT: mul a4, a4, a5 +; ZVFHMIN-NEXT: slli a4, a4, 3 +; ZVFHMIN-NEXT: mv a5, a4 +; ZVFHMIN-NEXT: slli a4, a4, 1 +; ZVFHMIN-NEXT: add a4, a4, a5 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: bltu a1, a2, .LBB263_2 +; ZVFHMIN-NEXT: bltu a1, a2, .LBB299_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a3, a2 -; ZVFHMIN-NEXT: .LBB263_2: +; ZVFHMIN-NEXT: .LBB299_2: ; ZVFHMIN-NEXT: csrr a4, vlenb ; ZVFHMIN-NEXT: slli a4, a4, 5 ; ZVFHMIN-NEXT: add a4, sp, a4 @@ -9560,8 +11208,10 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16_commuted(<vscale x 32 x half> % ; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: li a5, 24 -; ZVFHMIN-NEXT: mul a4, a4, a5 +; ZVFHMIN-NEXT: slli a4, a4, 3 +; ZVFHMIN-NEXT: mv a5, a4 +; ZVFHMIN-NEXT: slli a4, a4, 1 +; ZVFHMIN-NEXT: add a4, a4, a5 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload @@ -9577,8 +11227,10 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16_commuted(<vscale x 32 x half> % ; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4 ; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: li a4, 24 -; ZVFHMIN-NEXT: mul a3, a3, a4 +; ZVFHMIN-NEXT: slli a3, a3, 3 +; ZVFHMIN-NEXT: mv a4, a3 +; ZVFHMIN-NEXT: slli a3, a3, 1 +; ZVFHMIN-NEXT: add a3, a3, a4 ; ZVFHMIN-NEXT: add a3, sp, a3 ; ZVFHMIN-NEXT: addi a3, a3, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill @@ -9608,8 +11260,10 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16_commuted(<vscale x 32 x half> % ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a0, a0, a2 +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a2, a0 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: add a0, a0, a2 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -9626,8 +11280,10 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16_commuted(<vscale x 32 x half> % ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a1, 40 -; ZVFHMIN-NEXT: mul a0, a0, a1 +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret @@ -9654,8 +11310,10 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16_unmasked(<vscale x 32 x half> % ; ZVFHMIN-NEXT: sub sp, sp, a2 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: li a3, 24 -; ZVFHMIN-NEXT: mul a2, a2, a3 +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: slli a2, a2, 1 +; ZVFHMIN-NEXT: add a2, a2, a3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill @@ -9692,8 +11350,10 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16_unmasked(<vscale x 32 x half> % ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: li a4, 24 -; ZVFHMIN-NEXT: mul a2, a2, a4 +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: mv a4, a2 +; ZVFHMIN-NEXT: slli a2, a2, 1 +; ZVFHMIN-NEXT: add a2, a2, a4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload @@ -9721,13 +11381,15 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16_unmasked(<vscale x 32 x half> % ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: bltu a1, a0, .LBB264_2 +; ZVFHMIN-NEXT: bltu a1, a0, .LBB300_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a1, a0 -; ZVFHMIN-NEXT: .LBB264_2: +; ZVFHMIN-NEXT: .LBB300_2: ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a0, a0, a2 +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a2, a0 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: add a0, a0, a2 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -9770,8 +11432,10 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16_unmasked_commuted(<vscale x 32 ; ZVFHMIN-NEXT: sub sp, sp, a2 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: li a3, 24 -; ZVFHMIN-NEXT: mul a2, a2, a3 +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: slli a2, a2, 1 +; ZVFHMIN-NEXT: add a2, a2, a3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill @@ -9808,8 +11472,10 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16_unmasked_commuted(<vscale x 32 ; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: li a4, 24 -; ZVFHMIN-NEXT: mul a2, a2, a4 +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: mv a4, a2 +; ZVFHMIN-NEXT: slli a2, a2, 1 +; ZVFHMIN-NEXT: add a2, a2, a4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload @@ -9837,13 +11503,15 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16_unmasked_commuted(<vscale x 32 ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: bltu a1, a0, .LBB265_2 +; ZVFHMIN-NEXT: bltu a1, a0, .LBB301_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a1, a0 -; ZVFHMIN-NEXT: .LBB265_2: +; ZVFHMIN-NEXT: .LBB301_2: ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a0, a0, a2 +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a2, a0 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: add a0, a0, a2 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload @@ -9881,8 +11549,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16(<vscale x 32 x half> %va, half ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 40 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 2 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 @@ -9907,14 +11577,16 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16(<vscale x 32 x half> %va, half ; ZVFHMIN-NEXT: addi a3, a3, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: mv a3, a0 -; ZVFHMIN-NEXT: bltu a0, a2, .LBB266_2 +; ZVFHMIN-NEXT: bltu a0, a2, .LBB302_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a3, a2 -; ZVFHMIN-NEXT: .LBB266_2: +; ZVFHMIN-NEXT: .LBB302_2: ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16 ; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: li a5, 24 -; ZVFHMIN-NEXT: mul a4, a4, a5 +; ZVFHMIN-NEXT: slli a4, a4, 3 +; ZVFHMIN-NEXT: mv a5, a4 +; ZVFHMIN-NEXT: slli a4, a4, 1 +; ZVFHMIN-NEXT: add a4, a4, a5 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill @@ -9948,8 +11620,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16(<vscale x 32 x half> %va, half ; ZVFHMIN-NEXT: vsetvli a2, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a1 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload @@ -9984,8 +11658,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16(<vscale x 32 x half> %va, half ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24 ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a1, 40 -; ZVFHMIN-NEXT: mul a0, a0, a1 +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret @@ -10020,8 +11696,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_commute(<vscale x 32 x half> %v ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v24, a1 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill @@ -10034,10 +11712,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_commute(<vscale x 32 x half> %v ; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: mv a3, a0 -; ZVFHMIN-NEXT: bltu a0, a2, .LBB267_2 +; ZVFHMIN-NEXT: bltu a0, a2, .LBB303_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a3, a2 -; ZVFHMIN-NEXT: .LBB267_2: +; ZVFHMIN-NEXT: .LBB303_2: ; ZVFHMIN-NEXT: csrr a4, vlenb ; ZVFHMIN-NEXT: slli a4, a4, 4 ; ZVFHMIN-NEXT: add a4, sp, a4 @@ -10050,8 +11728,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_commute(<vscale x 32 x half> %v ; ZVFHMIN-NEXT: addi a4, a4, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: li a5, 24 -; ZVFHMIN-NEXT: mul a4, a4, a5 +; ZVFHMIN-NEXT: slli a4, a4, 3 +; ZVFHMIN-NEXT: mv a5, a4 +; ZVFHMIN-NEXT: slli a4, a4, 1 +; ZVFHMIN-NEXT: add a4, a4, a5 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload @@ -10087,8 +11767,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_commute(<vscale x 32 x half> %v ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload @@ -10136,8 +11818,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_unmasked(<vscale x 32 x half> % ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v24, a1 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill @@ -10172,8 +11856,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_unmasked(<vscale x 32 x half> % ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: li a4, 24 -; ZVFHMIN-NEXT: mul a2, a2, a4 +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: mv a4, a2 +; ZVFHMIN-NEXT: slli a2, a2, 1 +; ZVFHMIN-NEXT: add a2, a2, a4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload @@ -10195,10 +11881,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_unmasked(<vscale x 32 x half> % ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: bltu a0, a1, .LBB268_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB304_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB268_2: +; ZVFHMIN-NEXT: .LBB304_2: ; ZVFHMIN-NEXT: csrr a1, vlenb ; ZVFHMIN-NEXT: slli a1, a1, 4 ; ZVFHMIN-NEXT: add a1, sp, a1 @@ -10206,8 +11892,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_unmasked(<vscale x 32 x half> % ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload @@ -10253,8 +11941,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_unmasked_commute(<vscale x 32 x ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v24, a1 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill @@ -10289,8 +11979,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_unmasked_commute(<vscale x 32 x ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: li a4, 24 -; ZVFHMIN-NEXT: mul a2, a2, a4 +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: mv a4, a2 +; ZVFHMIN-NEXT: slli a2, a2, 1 +; ZVFHMIN-NEXT: add a2, a2, a4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload @@ -10307,10 +11999,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_unmasked_commute(<vscale x 32 x ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16 -; ZVFHMIN-NEXT: bltu a0, a1, .LBB269_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB305_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB269_2: +; ZVFHMIN-NEXT: .LBB305_2: ; ZVFHMIN-NEXT: csrr a1, vlenb ; ZVFHMIN-NEXT: slli a1, a1, 4 ; ZVFHMIN-NEXT: add a1, sp, a1 @@ -10323,8 +12015,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_unmasked_commute(<vscale x 32 x ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload @@ -10366,8 +12060,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat(<vscale x 32 x half> ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill @@ -10393,13 +12089,15 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat(<vscale x 32 x half> ; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 ; ZVFHMIN-NEXT: mv a3, a0 -; ZVFHMIN-NEXT: bltu a0, a2, .LBB270_2 +; ZVFHMIN-NEXT: bltu a0, a2, .LBB306_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a3, a2 -; ZVFHMIN-NEXT: .LBB270_2: +; ZVFHMIN-NEXT: .LBB306_2: ; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: li a5, 24 -; ZVFHMIN-NEXT: mul a4, a4, a5 +; ZVFHMIN-NEXT: slli a4, a4, 3 +; ZVFHMIN-NEXT: mv a5, a4 +; ZVFHMIN-NEXT: slli a4, a4, 1 +; ZVFHMIN-NEXT: add a4, a4, a5 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload @@ -10434,8 +12132,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat(<vscale x 32 x half> ; ZVFHMIN-NEXT: vsetvli a2, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a1 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload @@ -10483,13 +12183,18 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat_commute(<vscale x 32 ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 34 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 4 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x22, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 34 * vlenb ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 25 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: add a2, a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill @@ -10519,13 +12224,16 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat_commute(<vscale x 32 ; ZVFHMIN-NEXT: addi a3, a3, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: mv a3, a0 -; ZVFHMIN-NEXT: bltu a0, a2, .LBB271_2 +; ZVFHMIN-NEXT: bltu a0, a2, .LBB307_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a3, a2 -; ZVFHMIN-NEXT: .LBB271_2: +; ZVFHMIN-NEXT: .LBB307_2: ; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: li a5, 25 -; ZVFHMIN-NEXT: mul a4, a4, a5 +; ZVFHMIN-NEXT: mv a5, a4 +; ZVFHMIN-NEXT: slli a4, a4, 3 +; ZVFHMIN-NEXT: add a5, a5, a4 +; ZVFHMIN-NEXT: slli a4, a4, 1 +; ZVFHMIN-NEXT: add a4, a4, a5 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload @@ -10576,8 +12284,11 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat_commute(<vscale x 32 ; ZVFHMIN-NEXT: vsetvli a2, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a1 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 25 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: add a2, a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload @@ -10600,8 +12311,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat_commute(<vscale x 32 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24 ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a1, 34 -; ZVFHMIN-NEXT: mul a0, a0, a1 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret @@ -10629,8 +12342,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat_unmasked(<vscale x 32 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb ; ZVFHMIN-NEXT: vmv8r.v v24, v16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill @@ -10668,8 +12383,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat_unmasked(<vscale x 32 ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: li a4, 24 -; ZVFHMIN-NEXT: mul a2, a2, a4 +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: mv a4, a2 +; ZVFHMIN-NEXT: slli a2, a2, 1 +; ZVFHMIN-NEXT: add a2, a2, a4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload @@ -10686,10 +12403,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat_unmasked(<vscale x 32 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16 -; ZVFHMIN-NEXT: bltu a0, a1, .LBB272_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB308_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB272_2: +; ZVFHMIN-NEXT: .LBB308_2: ; ZVFHMIN-NEXT: csrr a1, vlenb ; ZVFHMIN-NEXT: slli a1, a1, 4 ; ZVFHMIN-NEXT: add a1, sp, a1 @@ -10702,8 +12419,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat_unmasked(<vscale x 32 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload @@ -10745,8 +12464,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat_unmasked_commute(<vsc ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill @@ -10784,8 +12505,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat_unmasked_commute(<vsc ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: li a4, 24 -; ZVFHMIN-NEXT: mul a2, a2, a4 +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: mv a4, a2 +; ZVFHMIN-NEXT: slli a2, a2, 1 +; ZVFHMIN-NEXT: add a2, a2, a4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload @@ -10807,10 +12530,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat_unmasked_commute(<vsc ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: bltu a0, a1, .LBB273_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB309_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB273_2: +; ZVFHMIN-NEXT: .LBB309_2: ; ZVFHMIN-NEXT: csrr a1, vlenb ; ZVFHMIN-NEXT: slli a1, a1, 4 ; ZVFHMIN-NEXT: add a1, sp, a1 @@ -10818,8 +12541,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat_unmasked_commute(<vsc ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmadd-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmadd-constrained-sdnode.ll index 52e438013fdb..dea411348ce5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmadd-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmadd-constrained-sdnode.ll @@ -1,16 +1,402 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 \ +; RUN: -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 \ +; RUN: -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN ; This tests a mix of vfmacc and vfmadd by using different operand orders to ; trigger commuting in TwoAddressInstructionPass. +define <vscale x 1 x bfloat> @vfmadd_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %vc) strictfp { +; CHECK-LABEL: vfmadd_vv_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfmadd.vv v9, v10, v11 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %vd = call <vscale x 1 x bfloat> @llvm.experimental.constrained.fma.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %vc, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <vscale x 1 x bfloat> %vd +} + +define <vscale x 1 x bfloat> @vfmadd_vf_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, bfloat %c) strictfp { +; CHECK-LABEL: vfmadd_vf_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfmadd.vv v12, v9, v11 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 1 x bfloat> poison, bfloat %c, i32 0 + %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %vd = call <vscale x 1 x bfloat> @llvm.experimental.constrained.fma.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %splat, <vscale x 1 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <vscale x 1 x bfloat> %vd +} + + +define <vscale x 2 x bfloat> @vfmadd_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x bfloat> %vc) strictfp { +; CHECK-LABEL: vfmadd_vv_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfmadd.vv v10, v9, v11 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %vd = call <vscale x 2 x bfloat> @llvm.experimental.constrained.fma.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vc, <vscale x 2 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <vscale x 2 x bfloat> %vd +} + +define <vscale x 2 x bfloat> @vfmadd_vf_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, bfloat %c) strictfp { +; CHECK-LABEL: vfmadd_vf_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfmadd.vv v9, v8, v11 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 2 x bfloat> poison, bfloat %c, i32 0 + %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer + %vd = call <vscale x 2 x bfloat> @llvm.experimental.constrained.fma.nxv2bf16(<vscale x 2 x bfloat> %vb, <vscale x 2 x bfloat> %splat, <vscale x 2 x bfloat> %va, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <vscale x 2 x bfloat> %vd +} + + +define <vscale x 4 x bfloat> @vfmadd_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x bfloat> %vc) strictfp { +; CHECK-LABEL: vfmadd_vv_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfmadd.vv v14, v10, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v14 +; CHECK-NEXT: ret + %vd = call <vscale x 4 x bfloat> @llvm.experimental.constrained.fma.nxv4bf16(<vscale x 4 x bfloat> %vb, <vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vc, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <vscale x 4 x bfloat> %vd +} + +define <vscale x 4 x bfloat> @vfmadd_vf_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, bfloat %c) strictfp { +; CHECK-LABEL: vfmadd_vf_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfmadd.vv v16, v14, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 4 x bfloat> poison, bfloat %c, i32 0 + %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer + %vd = call <vscale x 4 x bfloat> @llvm.experimental.constrained.fma.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %splat, <vscale x 4 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <vscale x 4 x bfloat> %vd +} + + +define <vscale x 8 x bfloat> @vfmadd_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %vc) strictfp { +; CHECK-LABEL: vfmadd_vv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v20, v12 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfmadd.vv v12, v20, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %vd = call <vscale x 8 x bfloat> @llvm.experimental.constrained.fma.nxv8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %vc, <vscale x 8 x bfloat> %va, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <vscale x 8 x bfloat> %vd +} + +define <vscale x 8 x bfloat> @vfmadd_vf_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, bfloat %c) strictfp { +; CHECK-LABEL: vfmadd_vf_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v12, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v20, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfmadd.vv v24, v20, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %c, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vd = call <vscale x 8 x bfloat> @llvm.experimental.constrained.fma.nxv8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %splat, <vscale x 8 x bfloat> %va, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <vscale x 8 x bfloat> %vd +} + + +define <vscale x 16 x bfloat> @vfmadd_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x bfloat> %vc) strictfp { +; CHECK-LABEL: vfmadd_vv_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs4r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfmadd.vv v24, v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %vd = call <vscale x 16 x bfloat> @llvm.experimental.constrained.fma.nxv16bf16(<vscale x 16 x bfloat> %vc, <vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <vscale x 16 x bfloat> %vd +} + +define <vscale x 16 x bfloat> @vfmadd_vf_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, bfloat %c) strictfp { +; CHECK-LABEL: vfmadd_vf_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv.v.x v16, a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs4r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8 +; CHECK-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfmadd.vv v16, v0, v24 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 16 x bfloat> poison, bfloat %c, i32 0 + %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer + %vd = call <vscale x 16 x bfloat> @llvm.experimental.constrained.fma.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %splat, <vscale x 16 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <vscale x 16 x bfloat> %vd +} + + +define <vscale x 32 x bfloat> @vfmadd_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x bfloat> %vc) strictfp { +; CHECK-LABEL: vfmadd_vv_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 5 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; CHECK-NEXT: vl8re16.v v0, (a0) +; CHECK-NEXT: vmv8r.v v24, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmv8r.v v16, v8 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmv8r.v v8, v0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfmadd.vv v0, v16, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfmadd.vv v16, v8, v24 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v0 +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %vd = call <vscale x 32 x bfloat> @llvm.experimental.constrained.fma.nxv32bf16(<vscale x 32 x bfloat> %vc, <vscale x 32 x bfloat> %vb, <vscale x 32 x bfloat> %va, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <vscale x 32 x bfloat> %vd +} + +define <vscale x 32 x bfloat> @vfmadd_vf_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, bfloat %c) strictfp { +; CHECK-LABEL: vfmadd_vf_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: vmv8r.v v24, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; CHECK-NEXT: vmv.v.x v16, a0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfmadd.vv v0, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v28 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfmadd.vv v16, v8, v24 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v0 +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 32 x bfloat> poison, bfloat %c, i32 0 + %splat = shufflevector <vscale x 32 x bfloat> %head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer + %vd = call <vscale x 32 x bfloat> @llvm.experimental.constrained.fma.nxv32bf16(<vscale x 32 x bfloat> %vb, <vscale x 32 x bfloat> %splat, <vscale x 32 x bfloat> %va, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <vscale x 32 x bfloat> %vd +} + declare <vscale x 1 x half> @llvm.experimental.constrained.fma.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, <vscale x 1 x half>, metadata, metadata) define <vscale x 1 x half> @vfmadd_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x half> %vc) strictfp { diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll index a80a943c2e1d..2df2212c43db 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll @@ -1,16 +1,573 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN ; This tests a mix of vfmacc and vfmadd by using different operand orders to ; trigger commuting in TwoAddressInstructionPass. +define <vscale x 1 x bfloat> @vfmadd_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %vc) { +; CHECK-LABEL: vfmadd_vv_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfmadd.vv v12, v10, v11 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %vd = call <vscale x 1 x bfloat> @llvm.fma.v1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %vc) + ret <vscale x 1 x bfloat> %vd +} + +define <vscale x 1 x bfloat> @vfmadd_vv_nxv1bf16_commuted(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %vc) { +; CHECK-LABEL: vfmadd_vv_nxv1bf16_commuted: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfmadd.vv v9, v8, v11 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %vd = call <vscale x 1 x bfloat> @llvm.fma.v1bf16(<vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %vc, <vscale x 1 x bfloat> %va) + ret <vscale x 1 x bfloat> %vd +} + +define <vscale x 1 x bfloat> @vfmadd_vf_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, bfloat %c) { +; CHECK-LABEL: vfmadd_vf_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfmadd.vv v12, v9, v11 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 1 x bfloat> poison, bfloat %c, i32 0 + %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %vd = call <vscale x 1 x bfloat> @llvm.fma.v1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %splat, <vscale x 1 x bfloat> %vb) + ret <vscale x 1 x bfloat> %vd +} + +declare <vscale x 2 x bfloat> @llvm.fma.v2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat>) + +define <vscale x 2 x bfloat> @vfmadd_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x bfloat> %vc) { +; CHECK-LABEL: vfmadd_vv_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfmadd.vv v12, v9, v11 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %vd = call <vscale x 2 x bfloat> @llvm.fma.v2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vc, <vscale x 2 x bfloat> %vb) + ret <vscale x 2 x bfloat> %vd +} + +define <vscale x 2 x bfloat> @vfmadd_vf_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, bfloat %c) { +; CHECK-LABEL: vfmadd_vf_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfmadd.vv v9, v8, v11 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 2 x bfloat> poison, bfloat %c, i32 0 + %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer + %vd = call <vscale x 2 x bfloat> @llvm.fma.v2bf16(<vscale x 2 x bfloat> %vb, <vscale x 2 x bfloat> %splat, <vscale x 2 x bfloat> %va) + ret <vscale x 2 x bfloat> %vd +} + +declare <vscale x 4 x bfloat> @llvm.fma.v4bf16(<vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat>) + +define <vscale x 4 x bfloat> @vfmadd_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x bfloat> %vc) { +; CHECK-LABEL: vfmadd_vv_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfmadd.vv v14, v10, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v14 +; CHECK-NEXT: ret + %vd = call <vscale x 4 x bfloat> @llvm.fma.v4bf16(<vscale x 4 x bfloat> %vb, <vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vc) + ret <vscale x 4 x bfloat> %vd +} + +define <vscale x 4 x bfloat> @vfmadd_vf_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, bfloat %c) { +; CHECK-LABEL: vfmadd_vf_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfmadd.vv v16, v14, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 4 x bfloat> poison, bfloat %c, i32 0 + %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer + %vd = call <vscale x 4 x bfloat> @llvm.fma.v4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %splat, <vscale x 4 x bfloat> %vb) + ret <vscale x 4 x bfloat> %vd +} + +declare <vscale x 8 x bfloat> @llvm.fma.v8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>) + +define <vscale x 8 x bfloat> @vfmadd_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %vc) { +; CHECK-LABEL: vfmadd_vv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v20, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfmadd.vv v24, v20, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 +; CHECK-NEXT: ret + %vd = call <vscale x 8 x bfloat> @llvm.fma.v8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %vc, <vscale x 8 x bfloat> %va) + ret <vscale x 8 x bfloat> %vd +} + +define <vscale x 8 x bfloat> @vfmadd_vf_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, bfloat %c) { +; CHECK-LABEL: vfmadd_vf_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v12, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v20, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfmadd.vv v24, v20, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %c, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vd = call <vscale x 8 x bfloat> @llvm.fma.v8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %splat, <vscale x 8 x bfloat> %va) + ret <vscale x 8 x bfloat> %vd +} + +declare <vscale x 16 x bfloat> @llvm.fma.v16bf16(<vscale x 16 x bfloat>, <vscale x 16 x bfloat>, <vscale x 16 x bfloat>) + +define <vscale x 16 x bfloat> @vfmadd_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x bfloat> %vc) { +; CHECK-LABEL: vfmadd_vv_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v16 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfmadd.vv v16, v0, v24 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %vd = call <vscale x 16 x bfloat> @llvm.fma.v16bf16(<vscale x 16 x bfloat> %vc, <vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb) + ret <vscale x 16 x bfloat> %vd +} + +define <vscale x 16 x bfloat> @vfmadd_vf_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, bfloat %c) { +; CHECK-LABEL: vfmadd_vf_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv.v.x v16, a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs4r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8 +; CHECK-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfmadd.vv v16, v0, v24 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 16 x bfloat> poison, bfloat %c, i32 0 + %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer + %vd = call <vscale x 16 x bfloat> @llvm.fma.v16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %splat, <vscale x 16 x bfloat> %vb) + ret <vscale x 16 x bfloat> %vd +} + +declare <vscale x 32 x bfloat> @llvm.fma.v32bf16(<vscale x 32 x bfloat>, <vscale x 32 x bfloat>, <vscale x 32 x bfloat>) + +define <vscale x 32 x bfloat> @vfmadd_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x bfloat> %vc) { +; ZVFH-LABEL: vfmadd_vv_nxv32bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: addi sp, sp, -16 +; ZVFH-NEXT: .cfi_def_cfa_offset 16 +; ZVFH-NEXT: csrr a1, vlenb +; ZVFH-NEXT: slli a1, a1, 5 +; ZVFH-NEXT: sub sp, sp, a1 +; ZVFH-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; ZVFH-NEXT: vl8re16.v v0, (a0) +; ZVFH-NEXT: vmv8r.v v24, v16 +; ZVFH-NEXT: csrr a0, vlenb +; ZVFH-NEXT: slli a0, a0, 4 +; ZVFH-NEXT: add a0, sp, a0 +; ZVFH-NEXT: addi a0, a0, 16 +; ZVFH-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; ZVFH-NEXT: vmv8r.v v16, v8 +; ZVFH-NEXT: addi a0, sp, 16 +; ZVFH-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v16 +; ZVFH-NEXT: csrr a0, vlenb +; ZVFH-NEXT: slli a0, a0, 3 +; ZVFH-NEXT: mv a1, a0 +; ZVFH-NEXT: slli a0, a0, 1 +; ZVFH-NEXT: add a0, a0, a1 +; ZVFH-NEXT: add a0, sp, a0 +; ZVFH-NEXT: addi a0, a0, 16 +; ZVFH-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v24 +; ZVFH-NEXT: csrr a0, vlenb +; ZVFH-NEXT: slli a0, a0, 3 +; ZVFH-NEXT: add a0, sp, a0 +; ZVFH-NEXT: addi a0, a0, 16 +; ZVFH-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; ZVFH-NEXT: vmv8r.v v8, v0 +; ZVFH-NEXT: vfwcvtbf16.f.f.v v0, v8 +; ZVFH-NEXT: csrr a0, vlenb +; ZVFH-NEXT: slli a0, a0, 3 +; ZVFH-NEXT: mv a1, a0 +; ZVFH-NEXT: slli a0, a0, 1 +; ZVFH-NEXT: add a0, a0, a1 +; ZVFH-NEXT: add a0, sp, a0 +; ZVFH-NEXT: addi a0, a0, 16 +; ZVFH-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFH-NEXT: csrr a0, vlenb +; ZVFH-NEXT: slli a0, a0, 3 +; ZVFH-NEXT: add a0, sp, a0 +; ZVFH-NEXT: addi a0, a0, 16 +; ZVFH-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFH-NEXT: vfmadd.vv v0, v16, v24 +; ZVFH-NEXT: addi a0, sp, 16 +; ZVFH-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFH-NEXT: vfwcvtbf16.f.f.v v24, v20 +; ZVFH-NEXT: csrr a0, vlenb +; ZVFH-NEXT: slli a0, a0, 3 +; ZVFH-NEXT: add a0, sp, a0 +; ZVFH-NEXT: addi a0, a0, 16 +; ZVFH-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; ZVFH-NEXT: csrr a0, vlenb +; ZVFH-NEXT: slli a0, a0, 4 +; ZVFH-NEXT: add a0, sp, a0 +; ZVFH-NEXT: addi a0, a0, 16 +; ZVFH-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFH-NEXT: vfwcvtbf16.f.f.v v24, v20 +; ZVFH-NEXT: csrr a0, vlenb +; ZVFH-NEXT: slli a0, a0, 3 +; ZVFH-NEXT: mv a1, a0 +; ZVFH-NEXT: slli a0, a0, 1 +; ZVFH-NEXT: add a0, a0, a1 +; ZVFH-NEXT: add a0, sp, a0 +; ZVFH-NEXT: addi a0, a0, 16 +; ZVFH-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v12 +; ZVFH-NEXT: csrr a0, vlenb +; ZVFH-NEXT: slli a0, a0, 3 +; ZVFH-NEXT: mv a1, a0 +; ZVFH-NEXT: slli a0, a0, 1 +; ZVFH-NEXT: add a0, a0, a1 +; ZVFH-NEXT: add a0, sp, a0 +; ZVFH-NEXT: addi a0, a0, 16 +; ZVFH-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFH-NEXT: csrr a0, vlenb +; ZVFH-NEXT: slli a0, a0, 3 +; ZVFH-NEXT: add a0, sp, a0 +; ZVFH-NEXT: addi a0, a0, 16 +; ZVFH-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFH-NEXT: vfmadd.vv v16, v8, v24 +; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v0 +; ZVFH-NEXT: vfncvtbf16.f.f.w v12, v16 +; ZVFH-NEXT: csrr a0, vlenb +; ZVFH-NEXT: slli a0, a0, 5 +; ZVFH-NEXT: add sp, sp, a0 +; ZVFH-NEXT: addi sp, sp, 16 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmadd_vv_nxv32bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: addi sp, sp, -16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 5 +; ZVFHMIN-NEXT: sub sp, sp, a1 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; ZVFHMIN-NEXT: vl8re16.v v0, (a0) +; ZVFHMIN-NEXT: vmv8r.v v24, v16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmv8r.v v16, v8 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: li a1, 24 +; ZVFHMIN-NEXT: mul a0, a0, a1 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v24 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmv8r.v v8, v0 +; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v0, v8 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: li a1, 24 +; ZVFHMIN-NEXT: mul a0, a0, a1 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v0, v16, v24 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v24, v20 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v24, v20 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: li a1, 24 +; ZVFHMIN-NEXT: mul a0, a0, a1 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v12 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: li a1, 24 +; ZVFHMIN-NEXT: mul a0, a0, a1 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v0 +; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v12, v16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 5 +; ZVFHMIN-NEXT: add sp, sp, a0 +; ZVFHMIN-NEXT: addi sp, sp, 16 +; ZVFHMIN-NEXT: ret + %vd = call <vscale x 32 x bfloat> @llvm.fma.v32bf16(<vscale x 32 x bfloat> %vc, <vscale x 32 x bfloat> %vb, <vscale x 32 x bfloat> %va) + ret <vscale x 32 x bfloat> %vd +} + +define <vscale x 32 x bfloat> @vfmadd_vf_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, bfloat %c) { +; ZVFH-LABEL: vfmadd_vf_nxv32bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: addi sp, sp, -16 +; ZVFH-NEXT: .cfi_def_cfa_offset 16 +; ZVFH-NEXT: csrr a0, vlenb +; ZVFH-NEXT: slli a0, a0, 3 +; ZVFH-NEXT: mv a1, a0 +; ZVFH-NEXT: slli a0, a0, 1 +; ZVFH-NEXT: add a0, a0, a1 +; ZVFH-NEXT: sub sp, sp, a0 +; ZVFH-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; ZVFH-NEXT: vmv8r.v v24, v16 +; ZVFH-NEXT: csrr a0, vlenb +; ZVFH-NEXT: slli a0, a0, 3 +; ZVFH-NEXT: add a0, sp, a0 +; ZVFH-NEXT: addi a0, a0, 16 +; ZVFH-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; ZVFH-NEXT: fmv.x.h a0, fa0 +; ZVFH-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; ZVFH-NEXT: vmv.v.x v16, a0 +; ZVFH-NEXT: csrr a0, vlenb +; ZVFH-NEXT: slli a0, a0, 4 +; ZVFH-NEXT: add a0, sp, a0 +; ZVFH-NEXT: addi a0, a0, 16 +; ZVFH-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v8 +; ZVFH-NEXT: addi a0, sp, 16 +; ZVFH-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v24 +; ZVFH-NEXT: csrr a0, vlenb +; ZVFH-NEXT: slli a0, a0, 4 +; ZVFH-NEXT: add a0, sp, a0 +; ZVFH-NEXT: addi a0, a0, 16 +; ZVFH-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFH-NEXT: vfwcvtbf16.f.f.v v0, v24 +; ZVFH-NEXT: addi a0, sp, 16 +; ZVFH-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFH-NEXT: vfmadd.vv v0, v16, v24 +; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v12 +; ZVFH-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; ZVFH-NEXT: csrr a0, vlenb +; ZVFH-NEXT: slli a0, a0, 3 +; ZVFH-NEXT: add a0, sp, a0 +; ZVFH-NEXT: addi a0, a0, 16 +; ZVFH-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v20 +; ZVFH-NEXT: csrr a0, vlenb +; ZVFH-NEXT: slli a0, a0, 4 +; ZVFH-NEXT: add a0, sp, a0 +; ZVFH-NEXT: addi a0, a0, 16 +; ZVFH-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFH-NEXT: vfwcvtbf16.f.f.v v24, v20 +; ZVFH-NEXT: addi a0, sp, 16 +; ZVFH-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFH-NEXT: vfmadd.vv v24, v8, v16 +; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v0 +; ZVFH-NEXT: vfncvtbf16.f.f.w v12, v24 +; ZVFH-NEXT: csrr a0, vlenb +; ZVFH-NEXT: slli a0, a0, 3 +; ZVFH-NEXT: mv a1, a0 +; ZVFH-NEXT: slli a0, a0, 1 +; ZVFH-NEXT: add a0, a0, a1 +; ZVFH-NEXT: add sp, sp, a0 +; ZVFH-NEXT: addi sp, sp, 16 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmadd_vf_nxv32bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: addi sp, sp, -16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: li a1, 24 +; ZVFHMIN-NEXT: mul a0, a0, a1 +; ZVFHMIN-NEXT: sub sp, sp, a0 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; ZVFHMIN-NEXT: vmv8r.v v24, v16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: fmv.x.h a0, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v16, a0 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v8 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v24 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v0, v24 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v0, v16, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v12 +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v20 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v24, v20 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v24, v8, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v0 +; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v12, v24 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: li a1, 24 +; ZVFHMIN-NEXT: mul a0, a0, a1 +; ZVFHMIN-NEXT: add sp, sp, a0 +; ZVFHMIN-NEXT: addi sp, sp, 16 +; ZVFHMIN-NEXT: ret + %head = insertelement <vscale x 32 x bfloat> poison, bfloat %c, i32 0 + %splat = shufflevector <vscale x 32 x bfloat> %head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer + %vd = call <vscale x 32 x bfloat> @llvm.fma.v32bf16(<vscale x 32 x bfloat> %vb, <vscale x 32 x bfloat> %splat, <vscale x 32 x bfloat> %va) + ret <vscale x 32 x bfloat> %vd +} + declare <vscale x 1 x half> @llvm.fma.v1f16(<vscale x 1 x half>, <vscale x 1 x half>, <vscale x 1 x half>) define <vscale x 1 x half> @vfmadd_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x half> %vc) { diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll index caf37b7a0a12..b5604add6d25 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll @@ -1,12 +1,243 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN + +define <vscale x 1 x bfloat> @vfmax_nxv1bf16_vv(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b) { +; CHECK-LABEL: vfmax_nxv1bf16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfmax.vv v9, v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 1 x bfloat> @llvm.maxnum.nxv1bf16(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b) + ret <vscale x 1 x bfloat> %v +} + +define <vscale x 1 x bfloat> @vfmax_nxv1bf16_vf(<vscale x 1 x bfloat> %a, bfloat %b) { +; CHECK-LABEL: vfmax_nxv1bf16_vf: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfmax.vv v9, v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x bfloat> @llvm.maxnum.nxv1bf16(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %splat) + ret <vscale x 1 x bfloat> %v +} + +declare <vscale x 2 x bfloat> @llvm.maxnum.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat>) + +define <vscale x 2 x bfloat> @vfmax_nxv2bf16_vv(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) { +; CHECK-LABEL: vfmax_nxv2bf16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfmax.vv v9, v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 2 x bfloat> @llvm.maxnum.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) + ret <vscale x 2 x bfloat> %v +} + +define <vscale x 2 x bfloat> @vfmax_nxv2bf16_vf(<vscale x 2 x bfloat> %a, bfloat %b) { +; CHECK-LABEL: vfmax_nxv2bf16_vf: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfmax.vv v9, v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x bfloat> @llvm.maxnum.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %splat) + ret <vscale x 2 x bfloat> %v +} + +declare <vscale x 4 x bfloat> @llvm.maxnum.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x bfloat>) + +define <vscale x 4 x bfloat> @vfmax_nxv4bf16_vv(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) { +; CHECK-LABEL: vfmax_nxv4bf16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfmax.vv v10, v12, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %v = call <vscale x 4 x bfloat> @llvm.maxnum.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) + ret <vscale x 4 x bfloat> %v +} + +define <vscale x 4 x bfloat> @vfmax_nxv4bf16_vf(<vscale x 4 x bfloat> %a, bfloat %b) { +; CHECK-LABEL: vfmax_nxv4bf16_vf: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfmax.vv v10, v10, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x bfloat> @llvm.maxnum.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %splat) + ret <vscale x 4 x bfloat> %v +} + +declare <vscale x 8 x bfloat> @llvm.maxnum.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>) + +define <vscale x 8 x bfloat> @vfmax_nxv8bf16_vv(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) { +; CHECK-LABEL: vfmax_nxv8bf16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfmax.vv v12, v16, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %v = call <vscale x 8 x bfloat> @llvm.maxnum.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) + ret <vscale x 8 x bfloat> %v +} + +define <vscale x 8 x bfloat> @vfmax_nxv8bf16_vf(<vscale x 8 x bfloat> %a, bfloat %b) { +; CHECK-LABEL: vfmax_nxv8bf16_vf: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfmax.vv v12, v12, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x bfloat> @llvm.maxnum.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %splat) + ret <vscale x 8 x bfloat> %v +} + +declare <vscale x 16 x bfloat> @llvm.maxnum.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x bfloat>) + +define <vscale x 16 x bfloat> @vfmax_nxv16bf16_vv(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b) { +; CHECK-LABEL: vfmax_nxv16bf16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfmax.vv v16, v24, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %v = call <vscale x 16 x bfloat> @llvm.maxnum.nxv16bf16(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b) + ret <vscale x 16 x bfloat> %v +} + +define <vscale x 16 x bfloat> @vfmax_nxv16bf16_vf(<vscale x 16 x bfloat> %a, bfloat %b) { +; CHECK-LABEL: vfmax_nxv16bf16_vf: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv.v.x v12, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfmax.vv v16, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x bfloat> @llvm.maxnum.nxv16bf16(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %splat) + ret <vscale x 16 x bfloat> %v +} + +declare <vscale x 32 x bfloat> @llvm.maxnum.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x bfloat>) + +define <vscale x 32 x bfloat> @vfmax_nxv32bf16_vv(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b) { +; CHECK-LABEL: vfmax_nxv32bf16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16 +; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfmax.vv v24, v0, v24 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfmax.vv v16, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: ret + %v = call <vscale x 32 x bfloat> @llvm.maxnum.nxv32bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b) + ret <vscale x 32 x bfloat> %v +} + +define <vscale x 32 x bfloat> @vfmax_nxv32bf16_vf(<vscale x 32 x bfloat> %a, bfloat %b) { +; CHECK-LABEL: vfmax_nxv32bf16_vf: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; CHECK-NEXT: vmv.v.x v16, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfmax.vv v24, v24, v0 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v20 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfmax.vv v16, v24, v0 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 32 x bfloat> %head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x bfloat> @llvm.maxnum.nxv32bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %splat) + ret <vscale x 32 x bfloat> %v +} declare <vscale x 1 x half> @llvm.maxnum.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfmax-vp.ll index 7ab999ea4fa7..6e38881b4d60 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmax-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmax-vp.ll @@ -1,13 +1,278 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN + +declare <vscale x 1 x bfloat> @llvm.vp.maxnum.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x i1>, i32) + +define <vscale x 1 x bfloat> @vfmax_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmax_vv_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfmax.vv v9, v9, v10, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 1 x bfloat> @llvm.vp.maxnum.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x bfloat> %v +} + +define <vscale x 1 x bfloat> @vfmax_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, i32 zeroext %evl) { +; CHECK-LABEL: vfmax_vv_nxv1bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfmax.vv v9, v9, v10 +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 1 x bfloat> @llvm.vp.maxnum.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl) + ret <vscale x 1 x bfloat> %v +} + +declare <vscale x 2 x bfloat> @llvm.vp.maxnum.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x i1>, i32) + +define <vscale x 2 x bfloat> @vfmax_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmax_vv_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfmax.vv v9, v9, v10, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 2 x bfloat> @llvm.vp.maxnum.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x bfloat> %v +} + +define <vscale x 2 x bfloat> @vfmax_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, i32 zeroext %evl) { +; CHECK-LABEL: vfmax_vv_nxv2bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfmax.vv v9, v9, v10 +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 2 x bfloat> @llvm.vp.maxnum.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl) + ret <vscale x 2 x bfloat> %v +} + +declare <vscale x 4 x bfloat> @llvm.vp.maxnum.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x i1>, i32) + +define <vscale x 4 x bfloat> @vfmax_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmax_vv_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfmax.vv v10, v12, v10, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %v = call <vscale x 4 x bfloat> @llvm.vp.maxnum.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x bfloat> %v +} + +define <vscale x 4 x bfloat> @vfmax_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, i32 zeroext %evl) { +; CHECK-LABEL: vfmax_vv_nxv4bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfmax.vv v10, v12, v10 +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %v = call <vscale x 4 x bfloat> @llvm.vp.maxnum.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl) + ret <vscale x 4 x bfloat> %v +} + +declare <vscale x 8 x bfloat> @llvm.vp.maxnum.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, i32) +define <vscale x 8 x bfloat> @vfmax_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmax_vv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfmax.vv v12, v16, v12, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %v = call <vscale x 8 x bfloat> @llvm.vp.maxnum.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x bfloat> %v +} + +define <vscale x 8 x bfloat> @vfmax_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, i32 zeroext %evl) { +; CHECK-LABEL: vfmax_vv_nxv8bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfmax.vv v12, v16, v12 +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %v = call <vscale x 8 x bfloat> @llvm.vp.maxnum.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl) + ret <vscale x 8 x bfloat> %v +} + +declare <vscale x 16 x bfloat> @llvm.vp.maxnum.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x bfloat>, <vscale x 16 x i1>, i32) + +define <vscale x 16 x bfloat> @vfmax_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmax_vv_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfmax.vv v16, v24, v16, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %v = call <vscale x 16 x bfloat> @llvm.vp.maxnum.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x bfloat> %v +} + +define <vscale x 16 x bfloat> @vfmax_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, i32 zeroext %evl) { +; CHECK-LABEL: vfmax_vv_nxv16bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfmax.vv v16, v24, v16 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %v = call <vscale x 16 x bfloat> @llvm.vp.maxnum.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> splat (i1 true), i32 %evl) + ret <vscale x 16 x bfloat> %v +} + +declare <vscale x 32 x bfloat> @llvm.vp.maxnum.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x bfloat>, <vscale x 32 x i1>, i32) + +define <vscale x 32 x bfloat> @vfmax_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmax_vv_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vmv1r.v v7, v0 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a1, a2, 1 +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: sltu a4, a0, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: srli a2, a2, 2 +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a2 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vfmax.vv v16, v16, v24, v0.t +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: bltu a0, a1, .LBB10_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB10_2: +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfmax.vv v16, v24, v16, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call <vscale x 32 x bfloat> @llvm.vp.maxnum.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x bfloat> %v +} + +define <vscale x 32 x bfloat> @vfmax_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, i32 zeroext %evl) { +; CHECK-LABEL: vfmax_vv_nxv32bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a1, a2, 1 +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: sltu a4, a0, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: srli a2, a2, 2 +; CHECK-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; CHECK-NEXT: vmset.m v24 +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v24, a2 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vfmax.vv v16, v16, v24, v0.t +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: bltu a0, a1, .LBB11_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB11_2: +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfmax.vv v16, v24, v16 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call <vscale x 32 x bfloat> @llvm.vp.maxnum.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> splat (i1 true), i32 %evl) + ret <vscale x 32 x bfloat> %v +} declare <vscale x 1 x half> @llvm.vp.maxnum.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, <vscale x 1 x i1>, i32) define <vscale x 1 x half> @vfmax_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) { @@ -264,10 +529,10 @@ define <vscale x 32 x half> @vfmax_vv_nxv32f16(<vscale x 32 x half> %va, <vscale ; ZVFHMIN-NEXT: vfmax.vv v16, v16, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 -; ZVFHMIN-NEXT: bltu a0, a1, .LBB10_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB22_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB10_2: +; ZVFHMIN-NEXT: .LBB22_2: ; ZVFHMIN-NEXT: addi a1, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 @@ -321,10 +586,10 @@ define <vscale x 32 x half> @vfmax_vv_nxv32f16_unmasked(<vscale x 32 x half> %va ; ZVFHMIN-NEXT: vfmax.vv v16, v16, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 -; ZVFHMIN-NEXT: bltu a0, a1, .LBB11_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB11_2: +; ZVFHMIN-NEXT: .LBB23_2: ; ZVFHMIN-NEXT: addi a1, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll index b47e14f4f26b..9212ddab5b1e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll @@ -1,12 +1,243 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN + +define <vscale x 1 x bfloat> @vfmin_nxv1bf16_vv(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b) { +; CHECK-LABEL: vfmin_nxv1bf16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfmin.vv v9, v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 1 x bfloat> @llvm.minnum.nxv1bf16(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b) + ret <vscale x 1 x bfloat> %v +} + +define <vscale x 1 x bfloat> @vfmin_nxv1bf16_vf(<vscale x 1 x bfloat> %a, bfloat %b) { +; CHECK-LABEL: vfmin_nxv1bf16_vf: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfmin.vv v9, v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x bfloat> @llvm.minnum.nxv1bf16(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %splat) + ret <vscale x 1 x bfloat> %v +} + +declare <vscale x 2 x bfloat> @llvm.minnum.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat>) + +define <vscale x 2 x bfloat> @vfmin_nxv2bf16_vv(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) { +; CHECK-LABEL: vfmin_nxv2bf16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfmin.vv v9, v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 2 x bfloat> @llvm.minnum.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) + ret <vscale x 2 x bfloat> %v +} + +define <vscale x 2 x bfloat> @vfmin_nxv2bf16_vf(<vscale x 2 x bfloat> %a, bfloat %b) { +; CHECK-LABEL: vfmin_nxv2bf16_vf: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfmin.vv v9, v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x bfloat> @llvm.minnum.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %splat) + ret <vscale x 2 x bfloat> %v +} + +declare <vscale x 4 x bfloat> @llvm.minnum.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x bfloat>) + +define <vscale x 4 x bfloat> @vfmin_nxv4bf16_vv(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) { +; CHECK-LABEL: vfmin_nxv4bf16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfmin.vv v10, v12, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %v = call <vscale x 4 x bfloat> @llvm.minnum.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) + ret <vscale x 4 x bfloat> %v +} + +define <vscale x 4 x bfloat> @vfmin_nxv4bf16_vf(<vscale x 4 x bfloat> %a, bfloat %b) { +; CHECK-LABEL: vfmin_nxv4bf16_vf: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfmin.vv v10, v10, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x bfloat> @llvm.minnum.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %splat) + ret <vscale x 4 x bfloat> %v +} + +declare <vscale x 8 x bfloat> @llvm.minnum.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>) + +define <vscale x 8 x bfloat> @vfmin_nxv8bf16_vv(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) { +; CHECK-LABEL: vfmin_nxv8bf16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfmin.vv v12, v16, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %v = call <vscale x 8 x bfloat> @llvm.minnum.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) + ret <vscale x 8 x bfloat> %v +} + +define <vscale x 8 x bfloat> @vfmin_nxv8bf16_vf(<vscale x 8 x bfloat> %a, bfloat %b) { +; CHECK-LABEL: vfmin_nxv8bf16_vf: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfmin.vv v12, v12, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x bfloat> @llvm.minnum.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %splat) + ret <vscale x 8 x bfloat> %v +} + +declare <vscale x 16 x bfloat> @llvm.minnum.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x bfloat>) + +define <vscale x 16 x bfloat> @vfmin_nxv16bf16_vv(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b) { +; CHECK-LABEL: vfmin_nxv16bf16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfmin.vv v16, v24, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %v = call <vscale x 16 x bfloat> @llvm.minnum.nxv16bf16(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b) + ret <vscale x 16 x bfloat> %v +} + +define <vscale x 16 x bfloat> @vfmin_nxv16bf16_vf(<vscale x 16 x bfloat> %a, bfloat %b) { +; CHECK-LABEL: vfmin_nxv16bf16_vf: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv.v.x v12, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfmin.vv v16, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x bfloat> @llvm.minnum.nxv16bf16(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %splat) + ret <vscale x 16 x bfloat> %v +} + +declare <vscale x 32 x bfloat> @llvm.minnum.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x bfloat>) + +define <vscale x 32 x bfloat> @vfmin_nxv32bf16_vv(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b) { +; CHECK-LABEL: vfmin_nxv32bf16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16 +; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfmin.vv v24, v0, v24 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfmin.vv v16, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: ret + %v = call <vscale x 32 x bfloat> @llvm.minnum.nxv32bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b) + ret <vscale x 32 x bfloat> %v +} + +define <vscale x 32 x bfloat> @vfmin_nxv32bf16_vf(<vscale x 32 x bfloat> %a, bfloat %b) { +; CHECK-LABEL: vfmin_nxv32bf16_vf: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; CHECK-NEXT: vmv.v.x v16, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfmin.vv v24, v24, v0 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v20 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfmin.vv v16, v24, v0 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 32 x bfloat> %head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x bfloat> @llvm.minnum.nxv32bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %splat) + ret <vscale x 32 x bfloat> %v +} declare <vscale x 1 x half> @llvm.minnum.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll index e928df85b5bb..f1d6b2100ae9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll @@ -1,13 +1,278 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN + +declare <vscale x 1 x bfloat> @llvm.vp.minnum.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x i1>, i32) + +define <vscale x 1 x bfloat> @vfmin_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmin_vv_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfmin.vv v9, v9, v10, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 1 x bfloat> @llvm.vp.minnum.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x bfloat> %v +} + +define <vscale x 1 x bfloat> @vfmin_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, i32 zeroext %evl) { +; CHECK-LABEL: vfmin_vv_nxv1bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfmin.vv v9, v9, v10 +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 1 x bfloat> @llvm.vp.minnum.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl) + ret <vscale x 1 x bfloat> %v +} + +declare <vscale x 2 x bfloat> @llvm.vp.minnum.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x i1>, i32) + +define <vscale x 2 x bfloat> @vfmin_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmin_vv_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfmin.vv v9, v9, v10, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 2 x bfloat> @llvm.vp.minnum.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x bfloat> %v +} + +define <vscale x 2 x bfloat> @vfmin_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, i32 zeroext %evl) { +; CHECK-LABEL: vfmin_vv_nxv2bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfmin.vv v9, v9, v10 +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 2 x bfloat> @llvm.vp.minnum.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl) + ret <vscale x 2 x bfloat> %v +} + +declare <vscale x 4 x bfloat> @llvm.vp.minnum.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x i1>, i32) + +define <vscale x 4 x bfloat> @vfmin_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmin_vv_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfmin.vv v10, v12, v10, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %v = call <vscale x 4 x bfloat> @llvm.vp.minnum.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x bfloat> %v +} + +define <vscale x 4 x bfloat> @vfmin_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, i32 zeroext %evl) { +; CHECK-LABEL: vfmin_vv_nxv4bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfmin.vv v10, v12, v10 +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %v = call <vscale x 4 x bfloat> @llvm.vp.minnum.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl) + ret <vscale x 4 x bfloat> %v +} + +declare <vscale x 8 x bfloat> @llvm.vp.minnum.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, i32) +define <vscale x 8 x bfloat> @vfmin_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmin_vv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfmin.vv v12, v16, v12, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %v = call <vscale x 8 x bfloat> @llvm.vp.minnum.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x bfloat> %v +} + +define <vscale x 8 x bfloat> @vfmin_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, i32 zeroext %evl) { +; CHECK-LABEL: vfmin_vv_nxv8bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfmin.vv v12, v16, v12 +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %v = call <vscale x 8 x bfloat> @llvm.vp.minnum.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl) + ret <vscale x 8 x bfloat> %v +} + +declare <vscale x 16 x bfloat> @llvm.vp.minnum.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x bfloat>, <vscale x 16 x i1>, i32) + +define <vscale x 16 x bfloat> @vfmin_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmin_vv_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfmin.vv v16, v24, v16, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %v = call <vscale x 16 x bfloat> @llvm.vp.minnum.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x bfloat> %v +} + +define <vscale x 16 x bfloat> @vfmin_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, i32 zeroext %evl) { +; CHECK-LABEL: vfmin_vv_nxv16bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfmin.vv v16, v24, v16 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %v = call <vscale x 16 x bfloat> @llvm.vp.minnum.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> splat (i1 true), i32 %evl) + ret <vscale x 16 x bfloat> %v +} + +declare <vscale x 32 x bfloat> @llvm.vp.minnum.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x bfloat>, <vscale x 32 x i1>, i32) + +define <vscale x 32 x bfloat> @vfmin_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmin_vv_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vmv1r.v v7, v0 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a1, a2, 1 +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: sltu a4, a0, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: srli a2, a2, 2 +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a2 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vfmin.vv v16, v16, v24, v0.t +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: bltu a0, a1, .LBB10_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB10_2: +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfmin.vv v16, v24, v16, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call <vscale x 32 x bfloat> @llvm.vp.minnum.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x bfloat> %v +} + +define <vscale x 32 x bfloat> @vfmin_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, i32 zeroext %evl) { +; CHECK-LABEL: vfmin_vv_nxv32bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a1, a2, 1 +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: sltu a4, a0, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: srli a2, a2, 2 +; CHECK-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; CHECK-NEXT: vmset.m v24 +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v24, a2 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vfmin.vv v16, v16, v24, v0.t +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: bltu a0, a1, .LBB11_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB11_2: +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfmin.vv v16, v24, v16 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call <vscale x 32 x bfloat> @llvm.vp.minnum.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> splat (i1 true), i32 %evl) + ret <vscale x 32 x bfloat> %v +} declare <vscale x 1 x half> @llvm.vp.minnum.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, <vscale x 1 x i1>, i32) define <vscale x 1 x half> @vfmin_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) { @@ -264,10 +529,10 @@ define <vscale x 32 x half> @vfmin_vv_nxv32f16(<vscale x 32 x half> %va, <vscale ; ZVFHMIN-NEXT: vfmin.vv v16, v16, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 -; ZVFHMIN-NEXT: bltu a0, a1, .LBB10_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB22_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB10_2: +; ZVFHMIN-NEXT: .LBB22_2: ; ZVFHMIN-NEXT: addi a1, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 @@ -321,10 +586,10 @@ define <vscale x 32 x half> @vfmin_vv_nxv32f16_unmasked(<vscale x 32 x half> %va ; ZVFHMIN-NEXT: vfmin.vv v16, v16, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 -; ZVFHMIN-NEXT: bltu a0, a1, .LBB11_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB11_2: +; ZVFHMIN-NEXT: .LBB23_2: ; ZVFHMIN-NEXT: addi a1, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmul-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmul-constrained-sdnode.ll index e82fdf065574..999b06ba5a57 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmul-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmul-constrained-sdnode.ll @@ -1,12 +1,239 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN + +define <vscale x 1 x bfloat> @vfmul_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb) strictfp { +; CHECK-LABEL: vfmul_vv_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfmul.vv v9, v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret +entry: + %vc = call <vscale x 1 x bfloat> @llvm.experimental.constrained.fmul.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 1 x bfloat> %vc +} + +define <vscale x 1 x bfloat> @vfmul_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b) strictfp { +; CHECK-LABEL: vfmul_vf_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfmul.vv v9, v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %vc = call <vscale x 1 x bfloat> @llvm.experimental.constrained.fmul.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 1 x bfloat> %vc +} + +define <vscale x 2 x bfloat> @vfmul_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb) strictfp { +; CHECK-LABEL: vfmul_vv_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfmul.vv v9, v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret +entry: + %vc = call <vscale x 2 x bfloat> @llvm.experimental.constrained.fmul.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 2 x bfloat> %vc +} + +define <vscale x 2 x bfloat> @vfmul_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloat %b) strictfp { +; CHECK-LABEL: vfmul_vf_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfmul.vv v9, v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer + %vc = call <vscale x 2 x bfloat> @llvm.experimental.constrained.fmul.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 2 x bfloat> %vc +} + +define <vscale x 4 x bfloat> @vfmul_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb) strictfp { +; CHECK-LABEL: vfmul_vv_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfmul.vv v10, v12, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret +entry: + %vc = call <vscale x 4 x bfloat> @llvm.experimental.constrained.fmul.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 4 x bfloat> %vc +} + +define <vscale x 4 x bfloat> @vfmul_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloat %b) strictfp { +; CHECK-LABEL: vfmul_vf_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfmul.vv v10, v10, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer + %vc = call <vscale x 4 x bfloat> @llvm.experimental.constrained.fmul.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 4 x bfloat> %vc +} + +define <vscale x 8 x bfloat> @vfmul_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) strictfp { +; CHECK-LABEL: vfmul_vv_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfmul.vv v12, v16, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret +entry: + %vc = call <vscale x 8 x bfloat> @llvm.experimental.constrained.fmul.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 8 x bfloat> %vc +} + +define <vscale x 8 x bfloat> @vfmul_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) strictfp { +; CHECK-LABEL: vfmul_vf_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfmul.vv v12, v12, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = call <vscale x 8 x bfloat> @llvm.experimental.constrained.fmul.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 8 x bfloat> %vc +} + +define <vscale x 16 x bfloat> @vfmul_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb) strictfp { +; CHECK-LABEL: vfmul_vv_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfmul.vv v16, v24, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret +entry: + %vc = call <vscale x 16 x bfloat> @llvm.experimental.constrained.fmul.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 16 x bfloat> %vc +} + +define <vscale x 16 x bfloat> @vfmul_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bfloat %b) strictfp { +; CHECK-LABEL: vfmul_vf_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv.v.x v12, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfmul.vv v16, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer + %vc = call <vscale x 16 x bfloat> @llvm.experimental.constrained.fmul.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 16 x bfloat> %vc +} + +define <vscale x 32 x bfloat> @vfmul_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb) strictfp { +; CHECK-LABEL: vfmul_vv_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16 +; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfmul.vv v24, v0, v24 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfmul.vv v16, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: ret +entry: + %vc = call <vscale x 32 x bfloat> @llvm.experimental.constrained.fmul.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 32 x bfloat> %vc +} + +define <vscale x 32 x bfloat> @vfmul_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bfloat %b) strictfp { +; CHECK-LABEL: vfmul_vf_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; CHECK-NEXT: vmv.v.x v16, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfmul.vv v24, v24, v0 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfmul.vv v16, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 32 x bfloat> %head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer + %vc = call <vscale x 32 x bfloat> @llvm.experimental.constrained.fmul.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 32 x bfloat> %vc +} declare <vscale x 1 x half> @llvm.experimental.constrained.fmul.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, metadata, metadata) define <vscale x 1 x half> @vfmul_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb) strictfp { diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmul-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmul-sdnode.ll index 70d664aa50ec..2ab04a45c818 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmul-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmul-sdnode.ll @@ -1,12 +1,252 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN + +define <vscale x 1 x bfloat> @vfmul_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb) { +; CHECK-LABEL: vfmul_vv_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfmul.vv v9, v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %vc = fmul <vscale x 1 x bfloat> %va, %vb + ret <vscale x 1 x bfloat> %vc +} + +define <vscale x 1 x bfloat> @vfmul_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: vfmul_vf_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfmul.vv v9, v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %vc = fmul <vscale x 1 x bfloat> %va, %splat + ret <vscale x 1 x bfloat> %vc +} + +define <vscale x 2 x bfloat> @vfmul_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb) { +; CHECK-LABEL: vfmul_vv_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfmul.vv v9, v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %vc = fmul <vscale x 2 x bfloat> %va, %vb + ret <vscale x 2 x bfloat> %vc +} + +define <vscale x 2 x bfloat> @vfmul_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: vfmul_vf_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfmul.vv v9, v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer + %vc = fmul <vscale x 2 x bfloat> %va, %splat + ret <vscale x 2 x bfloat> %vc +} + +define <vscale x 4 x bfloat> @vfmul_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb) { +; CHECK-LABEL: vfmul_vv_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfmul.vv v10, v12, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %vc = fmul <vscale x 4 x bfloat> %va, %vb + ret <vscale x 4 x bfloat> %vc +} + +define <vscale x 4 x bfloat> @vfmul_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: vfmul_vf_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfmul.vv v10, v10, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer + %vc = fmul <vscale x 4 x bfloat> %va, %splat + ret <vscale x 4 x bfloat> %vc +} + +define <vscale x 8 x bfloat> @vfmul_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) { +; CHECK-LABEL: vfmul_vv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfmul.vv v12, v16, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %vc = fmul <vscale x 8 x bfloat> %va, %vb + ret <vscale x 8 x bfloat> %vc +} + +define <vscale x 8 x bfloat> @vfmul_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: vfmul_vf_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfmul.vv v12, v12, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fmul <vscale x 8 x bfloat> %va, %splat + ret <vscale x 8 x bfloat> %vc +} + +define <vscale x 8 x bfloat> @vfmul_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: vfmul_fv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfmul.vv v12, v16, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fmul <vscale x 8 x bfloat> %splat, %va + ret <vscale x 8 x bfloat> %vc +} + +define <vscale x 16 x bfloat> @vfmul_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb) { +; CHECK-LABEL: vfmul_vv_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfmul.vv v16, v24, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %vc = fmul <vscale x 16 x bfloat> %va, %vb + ret <vscale x 16 x bfloat> %vc +} + +define <vscale x 16 x bfloat> @vfmul_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: vfmul_vf_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv.v.x v12, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfmul.vv v16, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer + %vc = fmul <vscale x 16 x bfloat> %va, %splat + ret <vscale x 16 x bfloat> %vc +} + +define <vscale x 32 x bfloat> @vfmul_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb) { +; CHECK-LABEL: vfmul_vv_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16 +; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfmul.vv v24, v0, v24 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfmul.vv v16, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: ret + %vc = fmul <vscale x 32 x bfloat> %va, %vb + ret <vscale x 32 x bfloat> %vc +} + +define <vscale x 32 x bfloat> @vfmul_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: vfmul_vf_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; CHECK-NEXT: vmv.v.x v16, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfmul.vv v24, v24, v0 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v20 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfmul.vv v16, v24, v0 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 32 x bfloat> %head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer + %vc = fmul <vscale x 32 x bfloat> %va, %splat + ret <vscale x 32 x bfloat> %vc +} define <vscale x 1 x half> @vfmul_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb) { ; ZVFH-LABEL: vfmul_vv_nxv1f16: diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-constrained-sdnode.ll index 806b817fd6c4..9da1e0a576d5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-constrained-sdnode.ll @@ -1,12 +1,110 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN + +define <vscale x 1 x bfloat> @vfsqrt_nxv1bf16(<vscale x 1 x bfloat> %v) strictfp { +; CHECK-LABEL: vfsqrt_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfsqrt.v v9, v9 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %r = call <vscale x 1 x bfloat> @llvm.experimental.constrained.sqrt.nxv1bf16(<vscale x 1 x bfloat> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <vscale x 1 x bfloat> %r +} + + +define <vscale x 2 x bfloat> @vfsqrt_nxv2bf16(<vscale x 2 x bfloat> %v) strictfp { +; CHECK-LABEL: vfsqrt_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfsqrt.v v9, v9 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %r = call <vscale x 2 x bfloat> @llvm.experimental.constrained.sqrt.nxv2bf16(<vscale x 2 x bfloat> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <vscale x 2 x bfloat> %r +} + + +define <vscale x 4 x bfloat> @vfsqrt_nxv4bf16(<vscale x 4 x bfloat> %v) strictfp { +; CHECK-LABEL: vfsqrt_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfsqrt.v v10, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %r = call <vscale x 4 x bfloat> @llvm.experimental.constrained.sqrt.nxv4bf16(<vscale x 4 x bfloat> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <vscale x 4 x bfloat> %r +} + + +define <vscale x 8 x bfloat> @vfsqrt_nxv8bf16(<vscale x 8 x bfloat> %v) strictfp { +; CHECK-LABEL: vfsqrt_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfsqrt.v v12, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %r = call <vscale x 8 x bfloat> @llvm.experimental.constrained.sqrt.nxv8bf16(<vscale x 8 x bfloat> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <vscale x 8 x bfloat> %r +} + + +define <vscale x 16 x bfloat> @vfsqrt_nxv16bf16(<vscale x 16 x bfloat> %v) strictfp { +; CHECK-LABEL: vfsqrt_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfsqrt.v v16, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %r = call <vscale x 16 x bfloat> @llvm.experimental.constrained.sqrt.nxv16bf16(<vscale x 16 x bfloat> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <vscale x 16 x bfloat> %r +} + + +define <vscale x 32 x bfloat> @vfsqrt_nxv32bf16(<vscale x 32 x bfloat> %v) strictfp { +; CHECK-LABEL: vfsqrt_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfsqrt.v v16, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfsqrt.v v16, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: ret + %r = call <vscale x 32 x bfloat> @llvm.experimental.constrained.sqrt.nxv32bf16(<vscale x 32 x bfloat> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <vscale x 32 x bfloat> %r +} declare <vscale x 1 x half> @llvm.experimental.constrained.sqrt.nxv1f16(<vscale x 1 x half>, metadata, metadata) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-sdnode.ll index 329a078cd166..de31a02cd154 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-sdnode.ll @@ -1,12 +1,105 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN + +define <vscale x 1 x bfloat> @vfsqrt_nxv1bf16(<vscale x 1 x bfloat> %v) { +; CHECK-LABEL: vfsqrt_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfsqrt.v v9, v9 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %r = call <vscale x 1 x bfloat> @llvm.sqrt.nxv1bf16(<vscale x 1 x bfloat> %v) + ret <vscale x 1 x bfloat> %r +} + +define <vscale x 2 x bfloat> @vfsqrt_nxv2bf16(<vscale x 2 x bfloat> %v) { +; CHECK-LABEL: vfsqrt_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfsqrt.v v9, v9 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %r = call <vscale x 2 x bfloat> @llvm.sqrt.nxv2bf16(<vscale x 2 x bfloat> %v) + ret <vscale x 2 x bfloat> %r +} + +define <vscale x 4 x bfloat> @vfsqrt_nxv4bf16(<vscale x 4 x bfloat> %v) { +; CHECK-LABEL: vfsqrt_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfsqrt.v v10, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %r = call <vscale x 4 x bfloat> @llvm.sqrt.nxv4bf16(<vscale x 4 x bfloat> %v) + ret <vscale x 4 x bfloat> %r +} + +define <vscale x 8 x bfloat> @vfsqrt_nxv8bf16(<vscale x 8 x bfloat> %v) { +; CHECK-LABEL: vfsqrt_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfsqrt.v v12, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %r = call <vscale x 8 x bfloat> @llvm.sqrt.nxv8bf16(<vscale x 8 x bfloat> %v) + ret <vscale x 8 x bfloat> %r +} + +define <vscale x 16 x bfloat> @vfsqrt_nxv16bf16(<vscale x 16 x bfloat> %v) { +; CHECK-LABEL: vfsqrt_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfsqrt.v v16, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %r = call <vscale x 16 x bfloat> @llvm.sqrt.nxv16bf16(<vscale x 16 x bfloat> %v) + ret <vscale x 16 x bfloat> %r +} + +define <vscale x 32 x bfloat> @vfsqrt_nxv32bf16(<vscale x 32 x bfloat> %v) { +; CHECK-LABEL: vfsqrt_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfsqrt.v v16, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfsqrt.v v16, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: ret + %r = call <vscale x 32 x bfloat> @llvm.sqrt.nxv32bf16(<vscale x 32 x bfloat> %v) + ret <vscale x 32 x bfloat> %r +} declare <vscale x 1 x half> @llvm.sqrt.nxv1f16(<vscale x 1 x half>) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll index bd229e0220a4..574c2e052630 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll @@ -1,13 +1,236 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN + +declare <vscale x 1 x bfloat> @llvm.vp.sqrt.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x i1>, i32) + +define <vscale x 1 x bfloat> @vfsqrt_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfsqrt_vv_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfsqrt.v v9, v9, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 1 x bfloat> @llvm.vp.sqrt.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x bfloat> %v +} + +define <vscale x 1 x bfloat> @vfsqrt_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, i32 zeroext %evl) { +; CHECK-LABEL: vfsqrt_vv_nxv1bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfsqrt.v v9, v9 +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 1 x bfloat> @llvm.vp.sqrt.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl) + ret <vscale x 1 x bfloat> %v +} + +declare <vscale x 2 x bfloat> @llvm.vp.sqrt.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x i1>, i32) + +define <vscale x 2 x bfloat> @vfsqrt_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfsqrt_vv_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfsqrt.v v9, v9, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 2 x bfloat> @llvm.vp.sqrt.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x bfloat> %v +} + +define <vscale x 2 x bfloat> @vfsqrt_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, i32 zeroext %evl) { +; CHECK-LABEL: vfsqrt_vv_nxv2bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfsqrt.v v9, v9 +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 2 x bfloat> @llvm.vp.sqrt.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl) + ret <vscale x 2 x bfloat> %v +} + +declare <vscale x 4 x bfloat> @llvm.vp.sqrt.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x i1>, i32) + +define <vscale x 4 x bfloat> @vfsqrt_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfsqrt_vv_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfsqrt.v v10, v10, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %v = call <vscale x 4 x bfloat> @llvm.vp.sqrt.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x bfloat> %v +} + +define <vscale x 4 x bfloat> @vfsqrt_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, i32 zeroext %evl) { +; CHECK-LABEL: vfsqrt_vv_nxv4bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfsqrt.v v10, v10 +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %v = call <vscale x 4 x bfloat> @llvm.vp.sqrt.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl) + ret <vscale x 4 x bfloat> %v +} + +declare <vscale x 8 x bfloat> @llvm.vp.sqrt.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, i32) + +define <vscale x 8 x bfloat> @vfsqrt_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfsqrt_vv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfsqrt.v v12, v12, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %v = call <vscale x 8 x bfloat> @llvm.vp.sqrt.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x bfloat> %v +} + +define <vscale x 8 x bfloat> @vfsqrt_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, i32 zeroext %evl) { +; CHECK-LABEL: vfsqrt_vv_nxv8bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfsqrt.v v12, v12 +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %v = call <vscale x 8 x bfloat> @llvm.vp.sqrt.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl) + ret <vscale x 8 x bfloat> %v +} + +declare <vscale x 16 x bfloat> @llvm.vp.sqrt.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x i1>, i32) + +define <vscale x 16 x bfloat> @vfsqrt_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfsqrt_vv_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfsqrt.v v16, v16, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %v = call <vscale x 16 x bfloat> @llvm.vp.sqrt.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x bfloat> %v +} + +define <vscale x 16 x bfloat> @vfsqrt_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, i32 zeroext %evl) { +; CHECK-LABEL: vfsqrt_vv_nxv16bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfsqrt.v v16, v16 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %v = call <vscale x 16 x bfloat> @llvm.vp.sqrt.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl) + ret <vscale x 16 x bfloat> %v +} + +declare <vscale x 32 x bfloat> @llvm.vp.sqrt.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x i1>, i32) + +define <vscale x 32 x bfloat> @vfsqrt_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfsqrt_vv_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a1, a2, 1 +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: sltu a4, a0, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: srli a2, a2, 2 +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a2 +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vfsqrt.v v24, v24, v0.t +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24 +; CHECK-NEXT: bltu a0, a1, .LBB10_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB10_2: +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfsqrt.v v16, v24, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %v = call <vscale x 32 x bfloat> @llvm.vp.sqrt.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x bfloat> %v +} +define <vscale x 32 x bfloat> @vfsqrt_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, i32 zeroext %evl) { +; CHECK-LABEL: vfsqrt_vv_nxv32bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a1, a2, 1 +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: sltu a4, a0, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: srli a2, a2, 2 +; CHECK-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v16, a2 +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vfsqrt.v v16, v16, v0.t +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: bltu a0, a1, .LBB11_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB11_2: +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfsqrt.v v16, v16 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %v = call <vscale x 32 x bfloat> @llvm.vp.sqrt.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl) + ret <vscale x 32 x bfloat> %v +} declare <vscale x 1 x half> @llvm.vp.sqrt.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32) define <vscale x 1 x half> @vfsqrt_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { @@ -245,10 +468,10 @@ define <vscale x 32 x half> @vfsqrt_vv_nxv32f16(<vscale x 32 x half> %va, <vscal ; ZVFHMIN-NEXT: vfsqrt.v v24, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 -; ZVFHMIN-NEXT: bltu a0, a1, .LBB10_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB22_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB10_2: +; ZVFHMIN-NEXT: .LBB22_2: ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 ; ZVFHMIN-NEXT: vmv1r.v v0, v16 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma @@ -286,10 +509,10 @@ define <vscale x 32 x half> @vfsqrt_vv_nxv32f16_unmasked(<vscale x 32 x half> %v ; ZVFHMIN-NEXT: vfsqrt.v v16, v16, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 -; ZVFHMIN-NEXT: bltu a0, a1, .LBB11_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB11_2: +; ZVFHMIN-NEXT: .LBB23_2: ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfsqrt.v v16, v16 @@ -537,10 +760,10 @@ define <vscale x 16 x double> @vfsqrt_vv_nxv16f64(<vscale x 16 x double> %va, <v ; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vfsqrt.v v16, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB32_2 +; CHECK-NEXT: bltu a0, a1, .LBB44_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB32_2: +; CHECK-NEXT: .LBB44_2: ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t @@ -559,10 +782,10 @@ define <vscale x 16 x double> @vfsqrt_vv_nxv16f64_unmasked(<vscale x 16 x double ; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vfsqrt.v v16, v16 -; CHECK-NEXT: bltu a0, a1, .LBB33_2 +; CHECK-NEXT: bltu a0, a1, .LBB45_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB33_2: +; CHECK-NEXT: .LBB45_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub-constrained-sdnode.ll index 5729dc4875ae..e40427a305f6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsub-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsub-constrained-sdnode.ll @@ -1,12 +1,258 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN + +define <vscale x 1 x bfloat> @vfsub_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb) strictfp { +; CHECK-LABEL: vfsub_vv_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfsub.vv v9, v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret +entry: + %vc = call <vscale x 1 x bfloat> @llvm.experimental.constrained.fsub.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 1 x bfloat> %vc +} + +define <vscale x 1 x bfloat> @vfsub_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b) strictfp { +; CHECK-LABEL: vfsub_vf_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfsub.vv v9, v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %vc = call <vscale x 1 x bfloat> @llvm.experimental.constrained.fsub.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 1 x bfloat> %vc +} + +define <vscale x 2 x bfloat> @vfsub_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb) strictfp { +; CHECK-LABEL: vfsub_vv_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfsub.vv v9, v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret +entry: + %vc = call <vscale x 2 x bfloat> @llvm.experimental.constrained.fsub.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 2 x bfloat> %vc +} + +define <vscale x 2 x bfloat> @vfsub_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloat %b) strictfp { +; CHECK-LABEL: vfsub_vf_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfsub.vv v9, v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer + %vc = call <vscale x 2 x bfloat> @llvm.experimental.constrained.fsub.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 2 x bfloat> %vc +} + +define <vscale x 4 x bfloat> @vfsub_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb) strictfp { +; CHECK-LABEL: vfsub_vv_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfsub.vv v10, v12, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret +entry: + %vc = call <vscale x 4 x bfloat> @llvm.experimental.constrained.fsub.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 4 x bfloat> %vc +} + +define <vscale x 4 x bfloat> @vfsub_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloat %b) strictfp { +; CHECK-LABEL: vfsub_vf_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfsub.vv v10, v10, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer + %vc = call <vscale x 4 x bfloat> @llvm.experimental.constrained.fsub.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 4 x bfloat> %vc +} + +define <vscale x 8 x bfloat> @vfsub_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) strictfp { +; CHECK-LABEL: vfsub_vv_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfsub.vv v12, v16, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret +entry: + %vc = call <vscale x 8 x bfloat> @llvm.experimental.constrained.fsub.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 8 x bfloat> %vc +} + +define <vscale x 8 x bfloat> @vfsub_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) strictfp { +; CHECK-LABEL: vfsub_vf_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfsub.vv v12, v12, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = call <vscale x 8 x bfloat> @llvm.experimental.constrained.fsub.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 8 x bfloat> %vc +} + +define <vscale x 8 x bfloat> @vfsub_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) strictfp { +; CHECK-LABEL: vfsub_fv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfsub.vv v12, v16, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = call <vscale x 8 x bfloat> @llvm.experimental.constrained.fsub.nxv8bf16(<vscale x 8 x bfloat> %splat, <vscale x 8 x bfloat> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 8 x bfloat> %vc +} + +define <vscale x 16 x bfloat> @vfsub_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb) strictfp { +; CHECK-LABEL: vfsub_vv_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfsub.vv v16, v24, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret +entry: + %vc = call <vscale x 16 x bfloat> @llvm.experimental.constrained.fsub.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 16 x bfloat> %vc +} + +define <vscale x 16 x bfloat> @vfsub_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bfloat %b) strictfp { +; CHECK-LABEL: vfsub_vf_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv.v.x v12, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfsub.vv v16, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer + %vc = call <vscale x 16 x bfloat> @llvm.experimental.constrained.fsub.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 16 x bfloat> %vc +} + +define <vscale x 32 x bfloat> @vfsub_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb) strictfp { +; CHECK-LABEL: vfsub_vv_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16 +; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfsub.vv v24, v0, v24 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfsub.vv v16, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: ret +entry: + %vc = call <vscale x 32 x bfloat> @llvm.experimental.constrained.fsub.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 32 x bfloat> %vc +} + +define <vscale x 32 x bfloat> @vfsub_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bfloat %b) strictfp { +; CHECK-LABEL: vfsub_vf_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; CHECK-NEXT: vmv.v.x v16, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfsub.vv v24, v24, v0 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfsub.vv v16, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 32 x bfloat> %head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer + %vc = call <vscale x 32 x bfloat> @llvm.experimental.constrained.fsub.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret <vscale x 32 x bfloat> %vc +} declare <vscale x 1 x half> @llvm.experimental.constrained.fsub.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, metadata, metadata) define <vscale x 1 x half> @vfsub_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb) strictfp { diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub-sdnode.ll index bd73398fd04b..e56cfd9ee4eb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsub-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsub-sdnode.ll @@ -1,12 +1,252 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN + +define <vscale x 1 x bfloat> @vfsub_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb) { +; CHECK-LABEL: vfsub_vv_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfsub.vv v9, v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %vc = fsub <vscale x 1 x bfloat> %va, %vb + ret <vscale x 1 x bfloat> %vc +} + +define <vscale x 1 x bfloat> @vfsub_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: vfsub_vf_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfsub.vv v9, v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %vc = fsub <vscale x 1 x bfloat> %va, %splat + ret <vscale x 1 x bfloat> %vc +} + +define <vscale x 2 x bfloat> @vfsub_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb) { +; CHECK-LABEL: vfsub_vv_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfsub.vv v9, v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %vc = fsub <vscale x 2 x bfloat> %va, %vb + ret <vscale x 2 x bfloat> %vc +} + +define <vscale x 2 x bfloat> @vfsub_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: vfsub_vf_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfsub.vv v9, v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer + %vc = fsub <vscale x 2 x bfloat> %va, %splat + ret <vscale x 2 x bfloat> %vc +} + +define <vscale x 4 x bfloat> @vfsub_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb) { +; CHECK-LABEL: vfsub_vv_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfsub.vv v10, v12, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %vc = fsub <vscale x 4 x bfloat> %va, %vb + ret <vscale x 4 x bfloat> %vc +} + +define <vscale x 4 x bfloat> @vfsub_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: vfsub_vf_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfsub.vv v10, v10, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer + %vc = fsub <vscale x 4 x bfloat> %va, %splat + ret <vscale x 4 x bfloat> %vc +} + +define <vscale x 8 x bfloat> @vfsub_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) { +; CHECK-LABEL: vfsub_vv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfsub.vv v12, v16, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %vc = fsub <vscale x 8 x bfloat> %va, %vb + ret <vscale x 8 x bfloat> %vc +} + +define <vscale x 8 x bfloat> @vfsub_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: vfsub_vf_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfsub.vv v12, v12, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fsub <vscale x 8 x bfloat> %va, %splat + ret <vscale x 8 x bfloat> %vc +} + +define <vscale x 8 x bfloat> @vfsub_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: vfsub_fv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfsub.vv v12, v16, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %vc = fsub <vscale x 8 x bfloat> %splat, %va + ret <vscale x 8 x bfloat> %vc +} + +define <vscale x 16 x bfloat> @vfsub_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb) { +; CHECK-LABEL: vfsub_vv_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfsub.vv v16, v24, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %vc = fsub <vscale x 16 x bfloat> %va, %vb + ret <vscale x 16 x bfloat> %vc +} + +define <vscale x 16 x bfloat> @vfsub_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: vfsub_vf_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv.v.x v12, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfsub.vv v16, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer + %vc = fsub <vscale x 16 x bfloat> %va, %splat + ret <vscale x 16 x bfloat> %vc +} + +define <vscale x 32 x bfloat> @vfsub_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb) { +; CHECK-LABEL: vfsub_vv_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16 +; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfsub.vv v24, v0, v24 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfsub.vv v16, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: ret + %vc = fsub <vscale x 32 x bfloat> %va, %vb + ret <vscale x 32 x bfloat> %vc +} + +define <vscale x 32 x bfloat> @vfsub_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bfloat %b) { +; CHECK-LABEL: vfsub_vf_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; CHECK-NEXT: vmv.v.x v16, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfsub.vv v24, v24, v0 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v20 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfsub.vv v16, v24, v0 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0 + %splat = shufflevector <vscale x 32 x bfloat> %head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer + %vc = fsub <vscale x 32 x bfloat> %va, %splat + ret <vscale x 32 x bfloat> %vc +} define <vscale x 1 x half> @vfsub_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb) { ; ZVFH-LABEL: vfsub_vv_nxv1f16: diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll index fda6d0c48d4a..449130e59876 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll @@ -1,13 +1,622 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN + +declare <vscale x 1 x bfloat> @llvm.vp.fsub.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x i1>, i32) + +define <vscale x 1 x bfloat> @vfsub_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfsub_vv_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfsub.vv v9, v9, v10, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 1 x bfloat> @llvm.vp.fsub.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x bfloat> %v +} + +define <vscale x 1 x bfloat> @vfsub_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, i32 zeroext %evl) { +; CHECK-LABEL: vfsub_vv_nxv1bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfsub.vv v9, v9, v10 +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 1 x bfloat> @llvm.vp.fsub.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, <vscale x 1 x i1> splat (i1 true), i32 %evl) + ret <vscale x 1 x bfloat> %v +} + +define <vscale x 1 x bfloat> @vfsub_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfsub_vf_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfsub.vv v9, v10, v8, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x bfloat> @llvm.vp.fsub.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x bfloat> %v +} + +define <vscale x 1 x bfloat> @vfsub_vf_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, bfloat %b, i32 zeroext %evl) { +; CHECK-LABEL: vfsub_vf_nxv1bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfsub.vv v9, v10, v8 +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x bfloat> @llvm.vp.fsub.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl) + ret <vscale x 1 x bfloat> %v +} + +declare <vscale x 2 x bfloat> @llvm.vp.fsub.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x i1>, i32) + +define <vscale x 2 x bfloat> @vfsub_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfsub_vv_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfsub.vv v9, v9, v10, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 2 x bfloat> @llvm.vp.fsub.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x bfloat> %v +} + +define <vscale x 2 x bfloat> @vfsub_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, i32 zeroext %evl) { +; CHECK-LABEL: vfsub_vv_nxv2bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfsub.vv v9, v9, v10 +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <vscale x 2 x bfloat> @llvm.vp.fsub.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, <vscale x 2 x i1> splat (i1 true), i32 %evl) + ret <vscale x 2 x bfloat> %v +} + +define <vscale x 2 x bfloat> @vfsub_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloat %b, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfsub_vf_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfsub.vv v9, v10, v8, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 2 x bfloat> %elt.head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x bfloat> @llvm.vp.fsub.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x bfloat> %v +} + +define <vscale x 2 x bfloat> @vfsub_vf_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, bfloat %b, i32 zeroext %evl) { +; CHECK-LABEL: vfsub_vf_nxv2bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfsub.vv v9, v10, v8 +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 2 x bfloat> %elt.head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x bfloat> @llvm.vp.fsub.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl) + ret <vscale x 2 x bfloat> %v +} + +declare <vscale x 4 x bfloat> @llvm.vp.fsub.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x i1>, i32) + +define <vscale x 4 x bfloat> @vfsub_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfsub_vv_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfsub.vv v10, v12, v10, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %v = call <vscale x 4 x bfloat> @llvm.vp.fsub.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x bfloat> %v +} + +define <vscale x 4 x bfloat> @vfsub_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, i32 zeroext %evl) { +; CHECK-LABEL: vfsub_vv_nxv4bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfsub.vv v10, v12, v10 +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %v = call <vscale x 4 x bfloat> @llvm.vp.fsub.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, <vscale x 4 x i1> splat (i1 true), i32 %evl) + ret <vscale x 4 x bfloat> %v +} + +define <vscale x 4 x bfloat> @vfsub_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloat %b, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfsub_vf_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfsub.vv v10, v10, v12, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 4 x bfloat> %elt.head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x bfloat> @llvm.vp.fsub.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x bfloat> %v +} + +define <vscale x 4 x bfloat> @vfsub_vf_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, bfloat %b, i32 zeroext %evl) { +; CHECK-LABEL: vfsub_vf_nxv4bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfsub.vv v10, v10, v12 +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 4 x bfloat> %elt.head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x bfloat> @llvm.vp.fsub.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl) + ret <vscale x 4 x bfloat> %v +} + +declare <vscale x 8 x bfloat> @llvm.vp.fsub.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, i32) +define <vscale x 8 x bfloat> @vfsub_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfsub_vv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfsub.vv v12, v16, v12, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %v = call <vscale x 8 x bfloat> @llvm.vp.fsub.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x bfloat> %v +} + +define <vscale x 8 x bfloat> @vfsub_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, i32 zeroext %evl) { +; CHECK-LABEL: vfsub_vv_nxv8bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfsub.vv v12, v16, v12 +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %v = call <vscale x 8 x bfloat> @llvm.vp.fsub.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, <vscale x 8 x i1> splat (i1 true), i32 %evl) + ret <vscale x 8 x bfloat> %v +} + +define <vscale x 8 x bfloat> @vfsub_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfsub_vf_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfsub.vv v12, v12, v16, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x bfloat> @llvm.vp.fsub.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x bfloat> %v +} + +define <vscale x 8 x bfloat> @vfsub_vf_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, bfloat %b, i32 zeroext %evl) { +; CHECK-LABEL: vfsub_vf_nxv8bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfsub.vv v12, v12, v16 +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x bfloat> @llvm.vp.fsub.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl) + ret <vscale x 8 x bfloat> %v +} + +declare <vscale x 16 x bfloat> @llvm.vp.fsub.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x bfloat>, <vscale x 16 x i1>, i32) + +define <vscale x 16 x bfloat> @vfsub_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfsub_vv_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfsub.vv v16, v24, v16, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %v = call <vscale x 16 x bfloat> @llvm.vp.fsub.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x bfloat> %v +} + +define <vscale x 16 x bfloat> @vfsub_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, i32 zeroext %evl) { +; CHECK-LABEL: vfsub_vv_nxv16bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfsub.vv v16, v24, v16 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %v = call <vscale x 16 x bfloat> @llvm.vp.fsub.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, <vscale x 16 x i1> splat (i1 true), i32 %evl) + ret <vscale x 16 x bfloat> %v +} + +define <vscale x 16 x bfloat> @vfsub_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bfloat %b, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfsub_vf_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv.v.x v12, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfsub.vv v16, v16, v24, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 16 x bfloat> %elt.head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x bfloat> @llvm.vp.fsub.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x bfloat> %v +} + +define <vscale x 16 x bfloat> @vfsub_vf_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, bfloat %b, i32 zeroext %evl) { +; CHECK-LABEL: vfsub_vf_nxv16bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv.v.x v12, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfsub.vv v16, v16, v24 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 16 x bfloat> %elt.head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x bfloat> @llvm.vp.fsub.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> splat (i1 true), i32 %evl) + ret <vscale x 16 x bfloat> %v +} + +declare <vscale x 32 x bfloat> @llvm.vp.fsub.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x bfloat>, <vscale x 32 x i1>, i32) + +define <vscale x 32 x bfloat> @vfsub_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %b, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfsub_vv_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vmv1r.v v7, v0 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a1, a2, 1 +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: sltu a4, a0, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: srli a2, a2, 2 +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a2 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vfsub.vv v16, v16, v24, v0.t +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: bltu a0, a1, .LBB20_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB20_2: +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfsub.vv v16, v24, v16, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call <vscale x 32 x bfloat> @llvm.vp.fsub.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %b, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x bfloat> %v +} + +define <vscale x 32 x bfloat> @vfsub_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %b, i32 zeroext %evl) { +; CHECK-LABEL: vfsub_vv_nxv32bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a1, a2, 1 +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: sltu a4, a0, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: srli a2, a2, 2 +; CHECK-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; CHECK-NEXT: vmset.m v24 +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v24, a2 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vfsub.vv v16, v16, v24, v0.t +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: bltu a0, a1, .LBB21_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB21_2: +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfsub.vv v16, v24, v16 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call <vscale x 32 x bfloat> @llvm.vp.fsub.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %b, <vscale x 32 x i1> splat (i1 true), i32 %evl) + ret <vscale x 32 x bfloat> %v +} + +define <vscale x 32 x bfloat> @vfsub_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bfloat %b, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfsub_vf_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 18 * vlenb +; CHECK-NEXT: vmv8r.v v24, v8 +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m8, ta, ma +; CHECK-NEXT: vmv.v.x v16, a1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a1, a2, 1 +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: sltu a4, a0, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: srli a2, a2, 2 +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: slli a4, a4, 3 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vs1r.v v0, (a4) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a2 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v28 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a4, a2, 3 +; CHECK-NEXT: add a2, a4, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v28 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vfsub.vv v16, v8, v16, v0.t +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: bltu a0, a1, .LBB22_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB22_2: +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfsub.vv v16, v16, v24, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 32 x bfloat> %elt.head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x bfloat> @llvm.vp.fsub.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x bfloat> %v +} + +define <vscale x 32 x bfloat> @vfsub_vf_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, bfloat %b, i32 zeroext %evl) { +; CHECK-LABEL: vfsub_vf_nxv32bf16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: vmv8r.v v16, v8 +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m8, ta, ma +; CHECK-NEXT: vmv.v.x v8, a1 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a1, a2, 1 +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: sltu a4, a0, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: srli a2, a2, 2 +; CHECK-NEXT: vmset.m v24 +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v24, a2 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20 +; CHECK-NEXT: vmv4r.v v16, v8 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vfsub.vv v16, v24, v16, v0.t +; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: bltu a0, a1, .LBB23_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB23_2: +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfsub.vv v16, v16, v24 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0 + %vb = shufflevector <vscale x 32 x bfloat> %elt.head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x bfloat> @llvm.vp.fsub.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> splat (i1 true), i32 %evl) + ret <vscale x 32 x bfloat> %v +} declare <vscale x 1 x half> @llvm.vp.fsub.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, <vscale x 1 x i1>, i32) define <vscale x 1 x half> @vfsub_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { @@ -514,10 +1123,10 @@ define <vscale x 32 x half> @vfsub_vv_nxv32f16(<vscale x 32 x half> %va, <vscale ; ZVFHMIN-NEXT: vfsub.vv v16, v16, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 -; ZVFHMIN-NEXT: bltu a0, a1, .LBB20_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB44_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB20_2: +; ZVFHMIN-NEXT: .LBB44_2: ; ZVFHMIN-NEXT: addi a1, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 @@ -571,10 +1180,10 @@ define <vscale x 32 x half> @vfsub_vv_nxv32f16_unmasked(<vscale x 32 x half> %va ; ZVFHMIN-NEXT: vfsub.vv v16, v16, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 -; ZVFHMIN-NEXT: bltu a0, a1, .LBB21_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB45_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB21_2: +; ZVFHMIN-NEXT: .LBB45_2: ; ZVFHMIN-NEXT: addi a1, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 @@ -649,10 +1258,10 @@ define <vscale x 32 x half> @vfsub_vf_nxv32f16(<vscale x 32 x half> %va, half %b ; ZVFHMIN-NEXT: vfsub.vv v16, v8, v16, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 -; ZVFHMIN-NEXT: bltu a0, a1, .LBB22_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB46_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB22_2: +; ZVFHMIN-NEXT: .LBB46_2: ; ZVFHMIN-NEXT: addi a1, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 @@ -730,10 +1339,10 @@ define <vscale x 32 x half> @vfsub_vf_nxv32f16_unmasked(<vscale x 32 x half> %va ; ZVFHMIN-NEXT: vfsub.vv v16, v24, v16, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 -; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB47_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB23_2: +; ZVFHMIN-NEXT: .LBB47_2: ; ZVFHMIN-NEXT: addi a1, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll index 7d78fa5a8f3e..0f8e74942d58 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll @@ -1,12 +1,288 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFHMIN + +declare bfloat @llvm.vp.reduce.fadd.nxv1bf16(bfloat, <vscale x 1 x bfloat>, <vscale x 1 x i1>, i32) + +define bfloat @vpreduce_fadd_nxv1bf16(bfloat %s, <vscale x 1 x bfloat> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_fadd_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa5 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfredusum.vs v8, v9, v8, v0.t +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %r = call reassoc bfloat @llvm.vp.reduce.fadd.nxv1bf16(bfloat %s, <vscale x 1 x bfloat> %v, <vscale x 1 x i1> %m, i32 %evl) + ret bfloat %r +} + +define bfloat @vpreduce_ord_fadd_nxv1bf16(bfloat %s, <vscale x 1 x bfloat> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_ord_fadd_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa5 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfredosum.vs v8, v9, v8, v0.t +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %r = call bfloat @llvm.vp.reduce.fadd.nxv1bf16(bfloat %s, <vscale x 1 x bfloat> %v, <vscale x 1 x i1> %m, i32 %evl) + ret bfloat %r +} + +declare bfloat @llvm.vp.reduce.fadd.nxv2bf16(bfloat, <vscale x 2 x bfloat>, <vscale x 2 x i1>, i32) + +define bfloat @vpreduce_fadd_nxv2bf16(bfloat %s, <vscale x 2 x bfloat> %v, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_fadd_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa5 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfredusum.vs v8, v9, v8, v0.t +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %r = call reassoc bfloat @llvm.vp.reduce.fadd.nxv2bf16(bfloat %s, <vscale x 2 x bfloat> %v, <vscale x 2 x i1> %m, i32 %evl) + ret bfloat %r +} + +define bfloat @vpreduce_ord_fadd_nxv2bf16(bfloat %s, <vscale x 2 x bfloat> %v, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_ord_fadd_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa5 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfredosum.vs v8, v9, v8, v0.t +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %r = call bfloat @llvm.vp.reduce.fadd.nxv2bf16(bfloat %s, <vscale x 2 x bfloat> %v, <vscale x 2 x i1> %m, i32 %evl) + ret bfloat %r +} + +declare bfloat @llvm.vp.reduce.fadd.nxv4bf16(bfloat, <vscale x 4 x bfloat>, <vscale x 4 x i1>, i32) + +define bfloat @vpreduce_fadd_nxv4bf16(bfloat %s, <vscale x 4 x bfloat> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_fadd_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa5 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfredusum.vs v8, v10, v8, v0.t +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %r = call reassoc bfloat @llvm.vp.reduce.fadd.nxv4bf16(bfloat %s, <vscale x 4 x bfloat> %v, <vscale x 4 x i1> %m, i32 %evl) + ret bfloat %r +} + +define bfloat @vpreduce_ord_fadd_nxv4bf16(bfloat %s, <vscale x 4 x bfloat> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_ord_fadd_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa5 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfredosum.vs v8, v10, v8, v0.t +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %r = call bfloat @llvm.vp.reduce.fadd.nxv4bf16(bfloat %s, <vscale x 4 x bfloat> %v, <vscale x 4 x i1> %m, i32 %evl) + ret bfloat %r +} + +declare bfloat @llvm.vp.reduce.fadd.nxv64bf16(bfloat, <vscale x 64 x bfloat>, <vscale x 64 x i1>, i32) + +define bfloat @vpreduce_fadd_nxv64bf16(bfloat %s, <vscale x 64 x bfloat> %v, <vscale x 64 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_fadd_nxv64bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: srli a1, a3, 1 +; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vx v7, v0, a1 +; CHECK-NEXT: slli a5, a3, 2 +; CHECK-NEXT: sub a1, a0, a5 +; CHECK-NEXT: sltu a2, a0, a1 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: and a1, a2, a1 +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: sub a2, a1, a4 +; CHECK-NEXT: sltu a6, a1, a2 +; CHECK-NEXT: bltu a1, a4, .LBB6_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, a4 +; CHECK-NEXT: .LBB6_2: +; CHECK-NEXT: addi a6, a6, -1 +; CHECK-NEXT: bltu a0, a5, .LBB6_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: mv a0, a5 +; CHECK-NEXT: .LBB6_4: +; CHECK-NEXT: and a2, a6, a2 +; CHECK-NEXT: sub a5, a0, a4 +; CHECK-NEXT: sltu a6, a0, a5 +; CHECK-NEXT: addi a6, a6, -1 +; CHECK-NEXT: and a5, a6, a5 +; CHECK-NEXT: srli a3, a3, 2 +; CHECK-NEXT: vsetvli a6, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v6, v0, a3 +; CHECK-NEXT: bltu a0, a4, .LBB6_6 +; CHECK-NEXT: # %bb.5: +; CHECK-NEXT: mv a0, a4 +; CHECK-NEXT: .LBB6_6: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetivli zero, 1, e32, m8, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa5 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfredusum.vs v8, v24, v8, v0.t +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa5, fa5 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa5 +; CHECK-NEXT: vsetivli zero, 1, e32, m8, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa5 +; CHECK-NEXT: vsetvli zero, a5, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfredusum.vs v8, v24, v8, v0.t +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa5, fa5 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa5 +; CHECK-NEXT: vsetivli zero, 1, e32, m8, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa5 +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfredusum.vs v8, v24, v8, v0.t +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa5, fa5 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa5 +; CHECK-NEXT: vsetivli zero, 1, e32, m8, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa5 +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v7, a3 +; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfredusum.vs v8, v24, v8, v0.t +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %r = call reassoc bfloat @llvm.vp.reduce.fadd.nxv64bf16(bfloat %s, <vscale x 64 x bfloat> %v, <vscale x 64 x i1> %m, i32 %evl) + ret bfloat %r +} + +define bfloat @vpreduce_ord_fadd_nxv64bf16(bfloat %s, <vscale x 64 x bfloat> %v, <vscale x 64 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_ord_fadd_nxv64bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: srli a1, a3, 1 +; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vx v7, v0, a1 +; CHECK-NEXT: slli a5, a3, 2 +; CHECK-NEXT: sub a1, a0, a5 +; CHECK-NEXT: sltu a2, a0, a1 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: and a1, a2, a1 +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: sub a2, a1, a4 +; CHECK-NEXT: sltu a6, a1, a2 +; CHECK-NEXT: bltu a1, a4, .LBB7_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, a4 +; CHECK-NEXT: .LBB7_2: +; CHECK-NEXT: addi a6, a6, -1 +; CHECK-NEXT: bltu a0, a5, .LBB7_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: mv a0, a5 +; CHECK-NEXT: .LBB7_4: +; CHECK-NEXT: and a2, a6, a2 +; CHECK-NEXT: sub a5, a0, a4 +; CHECK-NEXT: sltu a6, a0, a5 +; CHECK-NEXT: addi a6, a6, -1 +; CHECK-NEXT: and a5, a6, a5 +; CHECK-NEXT: srli a3, a3, 2 +; CHECK-NEXT: vsetvli a6, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v6, v0, a3 +; CHECK-NEXT: bltu a0, a4, .LBB7_6 +; CHECK-NEXT: # %bb.5: +; CHECK-NEXT: mv a0, a4 +; CHECK-NEXT: .LBB7_6: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetivli zero, 1, e32, m8, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa5 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfredosum.vs v8, v24, v8, v0.t +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa5, fa5 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa5 +; CHECK-NEXT: vsetivli zero, 1, e32, m8, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa5 +; CHECK-NEXT: vsetvli zero, a5, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfredosum.vs v8, v24, v8, v0.t +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa5, fa5 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa5 +; CHECK-NEXT: vsetivli zero, 1, e32, m8, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa5 +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfredosum.vs v8, v24, v8, v0.t +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa5, fa5 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa5 +; CHECK-NEXT: vsetivli zero, 1, e32, m8, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa5 +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v7, a3 +; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfredosum.vs v8, v24, v8, v0.t +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %r = call bfloat @llvm.vp.reduce.fadd.nxv64bf16(bfloat %s, <vscale x 64 x bfloat> %v, <vscale x 64 x i1> %m, i32 %evl) + ret bfloat %r +} declare half @llvm.vp.reduce.fadd.nxv1f16(half, <vscale x 1 x half>, <vscale x 1 x i1>, i32) @@ -184,10 +460,10 @@ define half @vpreduce_fadd_nxv64f16(half %s, <vscale x 64 x half> %v, <vscale x ; ZVFH-NEXT: sltu a3, a0, a1 ; ZVFH-NEXT: addi a3, a3, -1 ; ZVFH-NEXT: and a1, a3, a1 -; ZVFH-NEXT: bltu a0, a2, .LBB6_2 +; ZVFH-NEXT: bltu a0, a2, .LBB14_2 ; ZVFH-NEXT: # %bb.1: ; ZVFH-NEXT: mv a0, a2 -; ZVFH-NEXT: .LBB6_2: +; ZVFH-NEXT: .LBB14_2: ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfmv.s.f v25, fa0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma @@ -212,15 +488,15 @@ define half @vpreduce_fadd_nxv64f16(half %s, <vscale x 64 x half> %v, <vscale x ; ZVFHMIN-NEXT: slli a4, a3, 1 ; ZVFHMIN-NEXT: sub a2, a1, a4 ; ZVFHMIN-NEXT: sltu a6, a1, a2 -; ZVFHMIN-NEXT: bltu a1, a4, .LBB6_2 +; ZVFHMIN-NEXT: bltu a1, a4, .LBB14_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a1, a4 -; ZVFHMIN-NEXT: .LBB6_2: +; ZVFHMIN-NEXT: .LBB14_2: ; ZVFHMIN-NEXT: addi a6, a6, -1 -; ZVFHMIN-NEXT: bltu a0, a5, .LBB6_4 +; ZVFHMIN-NEXT: bltu a0, a5, .LBB14_4 ; ZVFHMIN-NEXT: # %bb.3: ; ZVFHMIN-NEXT: mv a0, a5 -; ZVFHMIN-NEXT: .LBB6_4: +; ZVFHMIN-NEXT: .LBB14_4: ; ZVFHMIN-NEXT: and a2, a6, a2 ; ZVFHMIN-NEXT: sub a5, a0, a4 ; ZVFHMIN-NEXT: sltu a6, a0, a5 @@ -229,10 +505,10 @@ define half @vpreduce_fadd_nxv64f16(half %s, <vscale x 64 x half> %v, <vscale x ; ZVFHMIN-NEXT: srli a3, a3, 2 ; ZVFHMIN-NEXT: vsetvli a6, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v6, v0, a3 -; ZVFHMIN-NEXT: bltu a0, a4, .LBB6_6 +; ZVFHMIN-NEXT: bltu a0, a4, .LBB14_6 ; ZVFHMIN-NEXT: # %bb.5: ; ZVFHMIN-NEXT: mv a0, a4 -; ZVFHMIN-NEXT: .LBB6_6: +; ZVFHMIN-NEXT: .LBB14_6: ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 @@ -290,10 +566,10 @@ define half @vpreduce_ord_fadd_nxv64f16(half %s, <vscale x 64 x half> %v, <vscal ; ZVFH-NEXT: sltu a3, a0, a1 ; ZVFH-NEXT: addi a3, a3, -1 ; ZVFH-NEXT: and a1, a3, a1 -; ZVFH-NEXT: bltu a0, a2, .LBB7_2 +; ZVFH-NEXT: bltu a0, a2, .LBB15_2 ; ZVFH-NEXT: # %bb.1: ; ZVFH-NEXT: mv a0, a2 -; ZVFH-NEXT: .LBB7_2: +; ZVFH-NEXT: .LBB15_2: ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfmv.s.f v25, fa0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma @@ -318,15 +594,15 @@ define half @vpreduce_ord_fadd_nxv64f16(half %s, <vscale x 64 x half> %v, <vscal ; ZVFHMIN-NEXT: slli a4, a3, 1 ; ZVFHMIN-NEXT: sub a2, a1, a4 ; ZVFHMIN-NEXT: sltu a6, a1, a2 -; ZVFHMIN-NEXT: bltu a1, a4, .LBB7_2 +; ZVFHMIN-NEXT: bltu a1, a4, .LBB15_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a1, a4 -; ZVFHMIN-NEXT: .LBB7_2: +; ZVFHMIN-NEXT: .LBB15_2: ; ZVFHMIN-NEXT: addi a6, a6, -1 -; ZVFHMIN-NEXT: bltu a0, a5, .LBB7_4 +; ZVFHMIN-NEXT: bltu a0, a5, .LBB15_4 ; ZVFHMIN-NEXT: # %bb.3: ; ZVFHMIN-NEXT: mv a0, a5 -; ZVFHMIN-NEXT: .LBB7_4: +; ZVFHMIN-NEXT: .LBB15_4: ; ZVFHMIN-NEXT: and a2, a6, a2 ; ZVFHMIN-NEXT: sub a5, a0, a4 ; ZVFHMIN-NEXT: sltu a6, a0, a5 @@ -335,10 +611,10 @@ define half @vpreduce_ord_fadd_nxv64f16(half %s, <vscale x 64 x half> %v, <vscal ; ZVFHMIN-NEXT: srli a3, a3, 2 ; ZVFHMIN-NEXT: vsetvli a6, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v6, v0, a3 -; ZVFHMIN-NEXT: bltu a0, a4, .LBB7_6 +; ZVFHMIN-NEXT: bltu a0, a4, .LBB15_6 ; ZVFHMIN-NEXT: # %bb.5: ; ZVFHMIN-NEXT: mv a0, a4 -; ZVFHMIN-NEXT: .LBB7_6: +; ZVFHMIN-NEXT: .LBB15_6: ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 @@ -592,12 +868,12 @@ define float @vreduce_fminimum_nxv4f32(float %start, <vscale x 4 x float> %val, ; CHECK-NEXT: feq.s a1, fa0, fa0 ; CHECK-NEXT: xori a1, a1, 1 ; CHECK-NEXT: or a0, a0, a1 -; CHECK-NEXT: beqz a0, .LBB22_2 +; CHECK-NEXT: beqz a0, .LBB30_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, 523264 ; CHECK-NEXT: fmv.w.x fa0, a0 ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB22_2: +; CHECK-NEXT: .LBB30_2: ; CHECK-NEXT: vfmv.f.s fa0, v10 ; CHECK-NEXT: ret %s = call float @llvm.vp.reduce.fminimum.nxv4f32(float %start, <vscale x 4 x float> %val, <vscale x 4 x i1> %m, i32 %evl) @@ -616,12 +892,12 @@ define float @vreduce_fmaximum_nxv4f32(float %start, <vscale x 4 x float> %val, ; CHECK-NEXT: feq.s a1, fa0, fa0 ; CHECK-NEXT: xori a1, a1, 1 ; CHECK-NEXT: or a0, a0, a1 -; CHECK-NEXT: beqz a0, .LBB23_2 +; CHECK-NEXT: beqz a0, .LBB31_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, 523264 ; CHECK-NEXT: fmv.w.x fa0, a0 ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB23_2: +; CHECK-NEXT: .LBB31_2: ; CHECK-NEXT: vfmv.f.s fa0, v10 ; CHECK-NEXT: ret %s = call float @llvm.vp.reduce.fmaximum.nxv4f32(float %start, <vscale x 4 x float> %val, <vscale x 4 x i1> %m, i32 %evl) @@ -666,12 +942,12 @@ define float @vreduce_fminimum_v4f32(float %start, <4 x float> %val, <4 x i1> %m ; CHECK-NEXT: feq.s a1, fa0, fa0 ; CHECK-NEXT: xori a1, a1, 1 ; CHECK-NEXT: or a0, a0, a1 -; CHECK-NEXT: beqz a0, .LBB26_2 +; CHECK-NEXT: beqz a0, .LBB34_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, 523264 ; CHECK-NEXT: fmv.w.x fa0, a0 ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: .LBB34_2: ; CHECK-NEXT: vfmv.f.s fa0, v9 ; CHECK-NEXT: ret %s = call float @llvm.vp.reduce.fminimum.v4f32(float %start, <4 x float> %val, <4 x i1> %m, i32 %evl) @@ -690,12 +966,12 @@ define float @vreduce_fmaximum_v4f32(float %start, <4 x float> %val, <4 x i1> %m ; CHECK-NEXT: feq.s a1, fa0, fa0 ; CHECK-NEXT: xori a1, a1, 1 ; CHECK-NEXT: or a0, a0, a1 -; CHECK-NEXT: beqz a0, .LBB27_2 +; CHECK-NEXT: beqz a0, .LBB35_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, 523264 ; CHECK-NEXT: fmv.w.x fa0, a0 ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB27_2: +; CHECK-NEXT: .LBB35_2: ; CHECK-NEXT: vfmv.f.s fa0, v9 ; CHECK-NEXT: ret %s = call float @llvm.vp.reduce.fmaximum.v4f32(float %start, <4 x float> %val, <4 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/countbits.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/countbits.ll new file mode 100644 index 000000000000..57ec0bda2e18 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/countbits.ll @@ -0,0 +1,21 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK: OpMemoryModel Logical GLSL450 + +define noundef i32 @countbits_i32(i32 noundef %a) { +entry: +; CHECK: %[[#]] = OpBitCount %[[#]] %[[#]] + %elt.bitreverse = call i32 @llvm.ctpop.i32(i32 %a) + ret i32 %elt.bitreverse +} + +define noundef i16 @countbits_i16(i16 noundef %a) { +entry: +; CHECK: %[[#]] = OpBitCount %[[#]] %[[#]] + %elt.ctpop = call i16 @llvm.ctpop.i16(i16 %a) + ret i16 %elt.ctpop +} + +declare i16 @llvm.ctpop.i16(i16) +declare i32 @llvm.ctpop.i32(i32) diff --git a/llvm/test/CodeGen/WebAssembly/exception.ll b/llvm/test/CodeGen/WebAssembly/exception.ll index 7259761d6313..1ad4c84f1c02 100644 --- a/llvm/test/CodeGen/WebAssembly/exception.ll +++ b/llvm/test/CodeGen/WebAssembly/exception.ll @@ -1,6 +1,7 @@ ; RUN: llc < %s -asm-verbose=false -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling -wasm-enable-exnref -verify-machineinstrs | FileCheck --implicit-check-not=ehgcr -allow-deprecated-dag-overlap %s ; RUN: llc < %s -asm-verbose=false -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling -wasm-enable-exnref -verify-machineinstrs -O0 ; RUN: llc < %s -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling -wasm-enable-exnref +; RUN: llc < %s -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling -wasm-enable-exnref -filetype=obj target triple = "wasm32-unknown-unknown" diff --git a/llvm/test/CodeGen/X86/avx512copy-intrinsics.ll b/llvm/test/CodeGen/X86/avx512copy-intrinsics.ll new file mode 100644 index 000000000000..a7ca23792e6f --- /dev/null +++ b/llvm/test/CodeGen/X86/avx512copy-intrinsics.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=AVX102 +; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx512f | FileCheck %s --check-prefixes=NOAVX512MOVZXC + +define <4 x i32> @test_mm_move_epi32(<4 x i32> %a0) nounwind { +; AVX102-LABEL: test_mm_move_epi32: +; AVX102: # %bb.0: +; AVX102-NEXT: vmovd %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7e,0x08,0x7e,0xc0] +; AVX102-NEXT: retq # encoding: [0xc3] +; +; NOAVX512MOVZXC-LABEL: test_mm_move_epi32: +; NOAVX512MOVZXC: # %bb.0: +; NOAVX512MOVZXC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf0,0x57,0xc9] +; NOAVX512MOVZXC-NEXT: vblendps $1, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x01] +; NOAVX512MOVZXC-NEXT: # xmm0 = xmm0[0],xmm1[1,2,3] +; NOAVX512MOVZXC-NEXT: retq # encoding: [0xc3] + %res = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 4, i32 4> + ret <4 x i32> %res +} + +define <8 x i16> @test_mm_move_epi16(<8 x i16> %a0) nounwind { +; AVX102-LABEL: test_mm_move_epi16: +; AVX102: # %bb.0: +; AVX102-NEXT: vmovw %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7e,0x08,0x6e,0xc0] +; AVX102-NEXT: retq # encoding: [0xc3] +; +; NOAVX512MOVZXC-LABEL: test_mm_move_epi16: +; NOAVX512MOVZXC: # %bb.0: +; NOAVX512MOVZXC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xef,0xc9] +; NOAVX512MOVZXC-NEXT: vpblendw $1, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0e,0xc0,0x01] +; NOAVX512MOVZXC-NEXT: # xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] +; NOAVX512MOVZXC-NEXT: retq # encoding: [0xc3] + %res = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> + ret <8 x i16> %res +} diff --git a/llvm/test/CodeGen/X86/comi-flags.ll b/llvm/test/CodeGen/X86/comi-flags.ll index 8b7a089f0ce8..6f520aa57dcd 100644 --- a/llvm/test/CodeGen/X86/comi-flags.ll +++ b/llvm/test/CodeGen/X86/comi-flags.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefix=SSE -; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefix=AVX +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=AVX,NO-AVX10_2 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=AVX,AVX10_2 ; ; SSE @@ -17,15 +18,22 @@ define i32 @test_x86_sse_comieq_ss(<4 x float> %a0, <4 x float> %a1, i32 %a2, i3 ; SSE-NEXT: cmovnel %esi, %eax ; SSE-NEXT: retq ; -; AVX-LABEL: test_x86_sse_comieq_ss: -; AVX: # %bb.0: -; AVX-NEXT: movl %edi, %eax -; AVX-NEXT: vcomiss %xmm1, %xmm0 -; AVX-NEXT: setnp %cl -; AVX-NEXT: sete %dl -; AVX-NEXT: testb %cl, %dl -; AVX-NEXT: cmovnel %esi, %eax -; AVX-NEXT: retq +; NO-AVX10_2-LABEL: test_x86_sse_comieq_ss: +; NO-AVX10_2: # %bb.0: +; NO-AVX10_2-NEXT: movl %edi, %eax +; NO-AVX10_2-NEXT: vcomiss %xmm1, %xmm0 +; NO-AVX10_2-NEXT: setnp %cl +; NO-AVX10_2-NEXT: sete %dl +; NO-AVX10_2-NEXT: testb %cl, %dl +; NO-AVX10_2-NEXT: cmovnel %esi, %eax +; NO-AVX10_2-NEXT: retq +; +; AVX10_2-LABEL: test_x86_sse_comieq_ss: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: movl %edi, %eax +; AVX10_2-NEXT: vcomxss %xmm1, %xmm0 +; AVX10_2-NEXT: cmovel %esi, %eax +; AVX10_2-NEXT: retq %call = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) %cmp = icmp eq i32 %call, 0 %res = select i1 %cmp, i32 %a2, i32 %a3 @@ -126,13 +134,20 @@ define i32 @test_x86_sse_comineq_ss(<4 x float> %a0, <4 x float> %a1, i32 %a2, i ; SSE-NEXT: cmovpl %edi, %eax ; SSE-NEXT: retq ; -; AVX-LABEL: test_x86_sse_comineq_ss: -; AVX: # %bb.0: -; AVX-NEXT: movl %esi, %eax -; AVX-NEXT: vcomiss %xmm1, %xmm0 -; AVX-NEXT: cmovnel %edi, %eax -; AVX-NEXT: cmovpl %edi, %eax -; AVX-NEXT: retq +; NO-AVX10_2-LABEL: test_x86_sse_comineq_ss: +; NO-AVX10_2: # %bb.0: +; NO-AVX10_2-NEXT: movl %esi, %eax +; NO-AVX10_2-NEXT: vcomiss %xmm1, %xmm0 +; NO-AVX10_2-NEXT: cmovnel %edi, %eax +; NO-AVX10_2-NEXT: cmovpl %edi, %eax +; NO-AVX10_2-NEXT: retq +; +; AVX10_2-LABEL: test_x86_sse_comineq_ss: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: movl %edi, %eax +; AVX10_2-NEXT: vcomxss %xmm1, %xmm0 +; AVX10_2-NEXT: cmovel %esi, %eax +; AVX10_2-NEXT: retq %call = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) %cmp = icmp ne i32 %call, 0 %res = select i1 %cmp, i32 %a2, i32 %a3 @@ -151,15 +166,22 @@ define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1, i32 %a2, i ; SSE-NEXT: cmovnel %esi, %eax ; SSE-NEXT: retq ; -; AVX-LABEL: test_x86_sse_ucomieq_ss: -; AVX: # %bb.0: -; AVX-NEXT: movl %edi, %eax -; AVX-NEXT: vucomiss %xmm1, %xmm0 -; AVX-NEXT: setnp %cl -; AVX-NEXT: sete %dl -; AVX-NEXT: testb %cl, %dl -; AVX-NEXT: cmovnel %esi, %eax -; AVX-NEXT: retq +; NO-AVX10_2-LABEL: test_x86_sse_ucomieq_ss: +; NO-AVX10_2: # %bb.0: +; NO-AVX10_2-NEXT: movl %edi, %eax +; NO-AVX10_2-NEXT: vucomiss %xmm1, %xmm0 +; NO-AVX10_2-NEXT: setnp %cl +; NO-AVX10_2-NEXT: sete %dl +; NO-AVX10_2-NEXT: testb %cl, %dl +; NO-AVX10_2-NEXT: cmovnel %esi, %eax +; NO-AVX10_2-NEXT: retq +; +; AVX10_2-LABEL: test_x86_sse_ucomieq_ss: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: movl %edi, %eax +; AVX10_2-NEXT: vucomxss %xmm1, %xmm0 +; AVX10_2-NEXT: cmovel %esi, %eax +; AVX10_2-NEXT: retq %call = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) %cmp = icmp eq i32 %call, 0 %res = select i1 %cmp, i32 %a2, i32 %a3 @@ -260,13 +282,20 @@ define i32 @test_x86_sse_ucomineq_ss(<4 x float> %a0, <4 x float> %a1, i32 %a2, ; SSE-NEXT: cmovpl %edi, %eax ; SSE-NEXT: retq ; -; AVX-LABEL: test_x86_sse_ucomineq_ss: -; AVX: # %bb.0: -; AVX-NEXT: movl %esi, %eax -; AVX-NEXT: vucomiss %xmm1, %xmm0 -; AVX-NEXT: cmovnel %edi, %eax -; AVX-NEXT: cmovpl %edi, %eax -; AVX-NEXT: retq +; NO-AVX10_2-LABEL: test_x86_sse_ucomineq_ss: +; NO-AVX10_2: # %bb.0: +; NO-AVX10_2-NEXT: movl %esi, %eax +; NO-AVX10_2-NEXT: vucomiss %xmm1, %xmm0 +; NO-AVX10_2-NEXT: cmovnel %edi, %eax +; NO-AVX10_2-NEXT: cmovpl %edi, %eax +; NO-AVX10_2-NEXT: retq +; +; AVX10_2-LABEL: test_x86_sse_ucomineq_ss: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: movl %edi, %eax +; AVX10_2-NEXT: vucomxss %xmm1, %xmm0 +; AVX10_2-NEXT: cmovel %esi, %eax +; AVX10_2-NEXT: retq %call = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) %cmp = icmp ne i32 %call, 0 %res = select i1 %cmp, i32 %a2, i32 %a3 @@ -289,15 +318,22 @@ define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1, i32 %a2, ; SSE-NEXT: cmovnel %esi, %eax ; SSE-NEXT: retq ; -; AVX-LABEL: test_x86_sse2_comieq_sd: -; AVX: # %bb.0: -; AVX-NEXT: movl %edi, %eax -; AVX-NEXT: vcomisd %xmm1, %xmm0 -; AVX-NEXT: setnp %cl -; AVX-NEXT: sete %dl -; AVX-NEXT: testb %cl, %dl -; AVX-NEXT: cmovnel %esi, %eax -; AVX-NEXT: retq +; NO-AVX10_2-LABEL: test_x86_sse2_comieq_sd: +; NO-AVX10_2: # %bb.0: +; NO-AVX10_2-NEXT: movl %edi, %eax +; NO-AVX10_2-NEXT: vcomisd %xmm1, %xmm0 +; NO-AVX10_2-NEXT: setnp %cl +; NO-AVX10_2-NEXT: sete %dl +; NO-AVX10_2-NEXT: testb %cl, %dl +; NO-AVX10_2-NEXT: cmovnel %esi, %eax +; NO-AVX10_2-NEXT: retq +; +; AVX10_2-LABEL: test_x86_sse2_comieq_sd: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: movl %edi, %eax +; AVX10_2-NEXT: vcomxsd %xmm1, %xmm0 +; AVX10_2-NEXT: cmovel %esi, %eax +; AVX10_2-NEXT: retq %call = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] %cmp = icmp eq i32 %call, 0 %res = select i1 %cmp, i32 %a2, i32 %a3 @@ -398,13 +434,20 @@ define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1, i32 %a2 ; SSE-NEXT: cmovpl %edi, %eax ; SSE-NEXT: retq ; -; AVX-LABEL: test_x86_sse2_comineq_sd: -; AVX: # %bb.0: -; AVX-NEXT: movl %esi, %eax -; AVX-NEXT: vcomisd %xmm1, %xmm0 -; AVX-NEXT: cmovnel %edi, %eax -; AVX-NEXT: cmovpl %edi, %eax -; AVX-NEXT: retq +; NO-AVX10_2-LABEL: test_x86_sse2_comineq_sd: +; NO-AVX10_2: # %bb.0: +; NO-AVX10_2-NEXT: movl %esi, %eax +; NO-AVX10_2-NEXT: vcomisd %xmm1, %xmm0 +; NO-AVX10_2-NEXT: cmovnel %edi, %eax +; NO-AVX10_2-NEXT: cmovpl %edi, %eax +; NO-AVX10_2-NEXT: retq +; +; AVX10_2-LABEL: test_x86_sse2_comineq_sd: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: movl %edi, %eax +; AVX10_2-NEXT: vcomxsd %xmm1, %xmm0 +; AVX10_2-NEXT: cmovel %esi, %eax +; AVX10_2-NEXT: retq %call = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] %cmp = icmp ne i32 %call, 0 %res = select i1 %cmp, i32 %a2, i32 %a3 @@ -423,15 +466,22 @@ define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1, i32 %a2 ; SSE-NEXT: cmovnel %esi, %eax ; SSE-NEXT: retq ; -; AVX-LABEL: test_x86_sse2_ucomieq_sd: -; AVX: # %bb.0: -; AVX-NEXT: movl %edi, %eax -; AVX-NEXT: vucomisd %xmm1, %xmm0 -; AVX-NEXT: setnp %cl -; AVX-NEXT: sete %dl -; AVX-NEXT: testb %cl, %dl -; AVX-NEXT: cmovnel %esi, %eax -; AVX-NEXT: retq +; NO-AVX10_2-LABEL: test_x86_sse2_ucomieq_sd: +; NO-AVX10_2: # %bb.0: +; NO-AVX10_2-NEXT: movl %edi, %eax +; NO-AVX10_2-NEXT: vucomisd %xmm1, %xmm0 +; NO-AVX10_2-NEXT: setnp %cl +; NO-AVX10_2-NEXT: sete %dl +; NO-AVX10_2-NEXT: testb %cl, %dl +; NO-AVX10_2-NEXT: cmovnel %esi, %eax +; NO-AVX10_2-NEXT: retq +; +; AVX10_2-LABEL: test_x86_sse2_ucomieq_sd: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: movl %edi, %eax +; AVX10_2-NEXT: vucomxsd %xmm1, %xmm0 +; AVX10_2-NEXT: cmovel %esi, %eax +; AVX10_2-NEXT: retq %call = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] %cmp = icmp eq i32 %call, 0 %res = select i1 %cmp, i32 %a2, i32 %a3 @@ -532,13 +582,20 @@ define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1, i32 %a ; SSE-NEXT: cmovpl %edi, %eax ; SSE-NEXT: retq ; -; AVX-LABEL: test_x86_sse2_ucomineq_sd: -; AVX: # %bb.0: -; AVX-NEXT: movl %esi, %eax -; AVX-NEXT: vucomisd %xmm1, %xmm0 -; AVX-NEXT: cmovnel %edi, %eax -; AVX-NEXT: cmovpl %edi, %eax -; AVX-NEXT: retq +; NO-AVX10_2-LABEL: test_x86_sse2_ucomineq_sd: +; NO-AVX10_2: # %bb.0: +; NO-AVX10_2-NEXT: movl %esi, %eax +; NO-AVX10_2-NEXT: vucomisd %xmm1, %xmm0 +; NO-AVX10_2-NEXT: cmovnel %edi, %eax +; NO-AVX10_2-NEXT: cmovpl %edi, %eax +; NO-AVX10_2-NEXT: retq +; +; AVX10_2-LABEL: test_x86_sse2_ucomineq_sd: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: movl %edi, %eax +; AVX10_2-NEXT: vucomxsd %xmm1, %xmm0 +; AVX10_2-NEXT: cmovel %esi, %eax +; AVX10_2-NEXT: retq %call = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] %cmp = icmp ne i32 %call, 0 %res = select i1 %cmp, i32 %a2, i32 %a3 @@ -557,15 +614,22 @@ define void @PR38960_eq(<4 x float> %A, <4 x float> %B) { ; SSE-NEXT: # %bb.1: # %if.end ; SSE-NEXT: retq ; -; AVX-LABEL: PR38960_eq: -; AVX: # %bb.0: # %entry -; AVX-NEXT: vcomiss %xmm1, %xmm0 -; AVX-NEXT: setnp %al -; AVX-NEXT: sete %cl -; AVX-NEXT: testb %al, %cl -; AVX-NEXT: jne foo@PLT # TAILCALL -; AVX-NEXT: # %bb.1: # %if.end -; AVX-NEXT: retq +; NO-AVX10_2-LABEL: PR38960_eq: +; NO-AVX10_2: # %bb.0: # %entry +; NO-AVX10_2-NEXT: vcomiss %xmm1, %xmm0 +; NO-AVX10_2-NEXT: setnp %al +; NO-AVX10_2-NEXT: sete %cl +; NO-AVX10_2-NEXT: testb %al, %cl +; NO-AVX10_2-NEXT: jne foo@PLT # TAILCALL +; NO-AVX10_2-NEXT: # %bb.1: # %if.end +; NO-AVX10_2-NEXT: retq +; +; AVX10_2-LABEL: PR38960_eq: +; AVX10_2: # %bb.0: # %entry +; AVX10_2-NEXT: vcomxss %xmm1, %xmm0 +; AVX10_2-NEXT: je foo@PLT # TAILCALL +; AVX10_2-NEXT: # %bb.1: # %if.end +; AVX10_2-NEXT: retq entry: %call = tail call i32 @llvm.x86.sse.comieq.ss(<4 x float> %A, <4 x float> %B) #3 %cmp = icmp eq i32 %call, 0 @@ -590,15 +654,22 @@ define void @PR38960_neq(<4 x float> %A, <4 x float> %B) { ; SSE-NEXT: # %bb.1: # %if.end ; SSE-NEXT: retq ; -; AVX-LABEL: PR38960_neq: -; AVX: # %bb.0: # %entry -; AVX-NEXT: vcomiss %xmm1, %xmm0 -; AVX-NEXT: setp %al -; AVX-NEXT: setne %cl -; AVX-NEXT: orb %al, %cl -; AVX-NEXT: jne foo@PLT # TAILCALL -; AVX-NEXT: # %bb.1: # %if.end -; AVX-NEXT: retq +; NO-AVX10_2-LABEL: PR38960_neq: +; NO-AVX10_2: # %bb.0: # %entry +; NO-AVX10_2-NEXT: vcomiss %xmm1, %xmm0 +; NO-AVX10_2-NEXT: setp %al +; NO-AVX10_2-NEXT: setne %cl +; NO-AVX10_2-NEXT: orb %al, %cl +; NO-AVX10_2-NEXT: jne foo@PLT # TAILCALL +; NO-AVX10_2-NEXT: # %bb.1: # %if.end +; NO-AVX10_2-NEXT: retq +; +; AVX10_2-LABEL: PR38960_neq: +; AVX10_2: # %bb.0: # %entry +; AVX10_2-NEXT: vcomxss %xmm1, %xmm0 +; AVX10_2-NEXT: jne foo@PLT # TAILCALL +; AVX10_2-NEXT: # %bb.1: # %if.end +; AVX10_2-NEXT: retq entry: %call = tail call i32 @llvm.x86.sse.comineq.ss(<4 x float> %A, <4 x float> %B) #3 %cmp = icmp eq i32 %call, 0 diff --git a/llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-7.ll b/llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-7.ll index ed316990e486..f616eafc2427 100644 --- a/llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-7.ll +++ b/llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-7.ll @@ -240,21 +240,17 @@ define void @load_i32_stride7_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr ; AVX512-FCP-NEXT: vmovdqa 32(%rdi), %ymm1 ; AVX512-FCP-NEXT: vmovdqa (%rdi), %ymm6 ; AVX512-FCP-NEXT: vpermi2d %ymm1, %ymm6, %ymm0 -; AVX512-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm7 = [13,4,6,7,13,4,6,7] -; AVX512-FCP-NEXT: # ymm7 = mem[0,1,0,1] +; AVX512-FCP-NEXT: vpmovsxbd {{.*#+}} xmm7 = [13,4,6,7] ; AVX512-FCP-NEXT: vpermi2d %ymm6, %ymm1, %ymm7 -; AVX512-FCP-NEXT: vextracti128 $1, %ymm7, %xmm7 -; AVX512-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm8 = [6,13,6,7,6,13,6,7] -; AVX512-FCP-NEXT: # ymm8 = mem[0,1,0,1] +; AVX512-FCP-NEXT: vpmovsxbd {{.*#+}} xmm8 = [6,13,6,7] ; AVX512-FCP-NEXT: vpermi2d %ymm1, %ymm6, %ymm8 -; AVX512-FCP-NEXT: vextracti128 $1, %ymm8, %xmm1 ; AVX512-FCP-NEXT: vmovq %xmm2, (%rsi) ; AVX512-FCP-NEXT: vmovq %xmm3, (%rdx) ; AVX512-FCP-NEXT: vmovq %xmm4, (%rcx) ; AVX512-FCP-NEXT: vmovq %xmm5, (%r8) ; AVX512-FCP-NEXT: vmovq %xmm0, (%r9) ; AVX512-FCP-NEXT: vmovq %xmm7, (%r10) -; AVX512-FCP-NEXT: vmovq %xmm1, (%rax) +; AVX512-FCP-NEXT: vmovq %xmm8, (%rax) ; AVX512-FCP-NEXT: vzeroupper ; AVX512-FCP-NEXT: retq ; @@ -309,21 +305,17 @@ define void @load_i32_stride7_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr ; AVX512DQ-FCP-NEXT: vmovdqa 32(%rdi), %ymm1 ; AVX512DQ-FCP-NEXT: vmovdqa (%rdi), %ymm6 ; AVX512DQ-FCP-NEXT: vpermi2d %ymm1, %ymm6, %ymm0 -; AVX512DQ-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm7 = [13,4,6,7,13,4,6,7] -; AVX512DQ-FCP-NEXT: # ymm7 = mem[0,1,0,1] +; AVX512DQ-FCP-NEXT: vpmovsxbd {{.*#+}} xmm7 = [13,4,6,7] ; AVX512DQ-FCP-NEXT: vpermi2d %ymm6, %ymm1, %ymm7 -; AVX512DQ-FCP-NEXT: vextracti128 $1, %ymm7, %xmm7 -; AVX512DQ-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm8 = [6,13,6,7,6,13,6,7] -; AVX512DQ-FCP-NEXT: # ymm8 = mem[0,1,0,1] +; AVX512DQ-FCP-NEXT: vpmovsxbd {{.*#+}} xmm8 = [6,13,6,7] ; AVX512DQ-FCP-NEXT: vpermi2d %ymm1, %ymm6, %ymm8 -; AVX512DQ-FCP-NEXT: vextracti128 $1, %ymm8, %xmm1 ; AVX512DQ-FCP-NEXT: vmovq %xmm2, (%rsi) ; AVX512DQ-FCP-NEXT: vmovq %xmm3, (%rdx) ; AVX512DQ-FCP-NEXT: vmovq %xmm4, (%rcx) ; AVX512DQ-FCP-NEXT: vmovq %xmm5, (%r8) ; AVX512DQ-FCP-NEXT: vmovq %xmm0, (%r9) ; AVX512DQ-FCP-NEXT: vmovq %xmm7, (%r10) -; AVX512DQ-FCP-NEXT: vmovq %xmm1, (%rax) +; AVX512DQ-FCP-NEXT: vmovq %xmm8, (%rax) ; AVX512DQ-FCP-NEXT: vzeroupper ; AVX512DQ-FCP-NEXT: retq ; @@ -378,21 +370,17 @@ define void @load_i32_stride7_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr ; AVX512BW-FCP-NEXT: vmovdqa 32(%rdi), %ymm1 ; AVX512BW-FCP-NEXT: vmovdqa (%rdi), %ymm6 ; AVX512BW-FCP-NEXT: vpermi2d %ymm1, %ymm6, %ymm0 -; AVX512BW-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm7 = [13,4,6,7,13,4,6,7] -; AVX512BW-FCP-NEXT: # ymm7 = mem[0,1,0,1] +; AVX512BW-FCP-NEXT: vpmovsxbd {{.*#+}} xmm7 = [13,4,6,7] ; AVX512BW-FCP-NEXT: vpermi2d %ymm6, %ymm1, %ymm7 -; AVX512BW-FCP-NEXT: vextracti128 $1, %ymm7, %xmm7 -; AVX512BW-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm8 = [6,13,6,7,6,13,6,7] -; AVX512BW-FCP-NEXT: # ymm8 = mem[0,1,0,1] +; AVX512BW-FCP-NEXT: vpmovsxbd {{.*#+}} xmm8 = [6,13,6,7] ; AVX512BW-FCP-NEXT: vpermi2d %ymm1, %ymm6, %ymm8 -; AVX512BW-FCP-NEXT: vextracti128 $1, %ymm8, %xmm1 ; AVX512BW-FCP-NEXT: vmovq %xmm2, (%rsi) ; AVX512BW-FCP-NEXT: vmovq %xmm3, (%rdx) ; AVX512BW-FCP-NEXT: vmovq %xmm4, (%rcx) ; AVX512BW-FCP-NEXT: vmovq %xmm5, (%r8) ; AVX512BW-FCP-NEXT: vmovq %xmm0, (%r9) ; AVX512BW-FCP-NEXT: vmovq %xmm7, (%r10) -; AVX512BW-FCP-NEXT: vmovq %xmm1, (%rax) +; AVX512BW-FCP-NEXT: vmovq %xmm8, (%rax) ; AVX512BW-FCP-NEXT: vzeroupper ; AVX512BW-FCP-NEXT: retq ; @@ -447,21 +435,17 @@ define void @load_i32_stride7_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr ; AVX512DQ-BW-FCP-NEXT: vmovdqa 32(%rdi), %ymm1 ; AVX512DQ-BW-FCP-NEXT: vmovdqa (%rdi), %ymm6 ; AVX512DQ-BW-FCP-NEXT: vpermi2d %ymm1, %ymm6, %ymm0 -; AVX512DQ-BW-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm7 = [13,4,6,7,13,4,6,7] -; AVX512DQ-BW-FCP-NEXT: # ymm7 = mem[0,1,0,1] +; AVX512DQ-BW-FCP-NEXT: vpmovsxbd {{.*#+}} xmm7 = [13,4,6,7] ; AVX512DQ-BW-FCP-NEXT: vpermi2d %ymm6, %ymm1, %ymm7 -; AVX512DQ-BW-FCP-NEXT: vextracti128 $1, %ymm7, %xmm7 -; AVX512DQ-BW-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm8 = [6,13,6,7,6,13,6,7] -; AVX512DQ-BW-FCP-NEXT: # ymm8 = mem[0,1,0,1] +; AVX512DQ-BW-FCP-NEXT: vpmovsxbd {{.*#+}} xmm8 = [6,13,6,7] ; AVX512DQ-BW-FCP-NEXT: vpermi2d %ymm1, %ymm6, %ymm8 -; AVX512DQ-BW-FCP-NEXT: vextracti128 $1, %ymm8, %xmm1 ; AVX512DQ-BW-FCP-NEXT: vmovq %xmm2, (%rsi) ; AVX512DQ-BW-FCP-NEXT: vmovq %xmm3, (%rdx) ; AVX512DQ-BW-FCP-NEXT: vmovq %xmm4, (%rcx) ; AVX512DQ-BW-FCP-NEXT: vmovq %xmm5, (%r8) ; AVX512DQ-BW-FCP-NEXT: vmovq %xmm0, (%r9) ; AVX512DQ-BW-FCP-NEXT: vmovq %xmm7, (%r10) -; AVX512DQ-BW-FCP-NEXT: vmovq %xmm1, (%rax) +; AVX512DQ-BW-FCP-NEXT: vmovq %xmm8, (%rax) ; AVX512DQ-BW-FCP-NEXT: vzeroupper ; AVX512DQ-BW-FCP-NEXT: retq %wide.vec = load <14 x i32>, ptr %in.vec, align 64 diff --git a/llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-8.ll b/llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-8.ll index 2fd173c72917..872a8d00cc23 100644 --- a/llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-8.ll +++ b/llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-8.ll @@ -226,10 +226,8 @@ define void @load_i32_stride8_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr ; AVX512-FCP-NEXT: vmovdqa (%rdi), %ymm4 ; AVX512-FCP-NEXT: vpunpckldq {{.*#+}} ymm5 = ymm4[0],ymm1[0],ymm4[1],ymm1[1],ymm4[4],ymm1[4],ymm4[5],ymm1[5] ; AVX512-FCP-NEXT: vextracti128 $1, %ymm5, %xmm5 -; AVX512-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm6 = [5,13,5,5,5,13,5,5] -; AVX512-FCP-NEXT: # ymm6 = mem[0,1,0,1] +; AVX512-FCP-NEXT: vpmovsxbd {{.*#+}} xmm6 = [5,13,5,5] ; AVX512-FCP-NEXT: vpermi2d %ymm1, %ymm4, %ymm6 -; AVX512-FCP-NEXT: vextracti128 $1, %ymm6, %xmm6 ; AVX512-FCP-NEXT: vpunpckhdq {{.*#+}} ymm1 = ymm4[2],ymm1[2],ymm4[3],ymm1[3],ymm4[6],ymm1[6],ymm4[7],ymm1[7] ; AVX512-FCP-NEXT: vextracti128 $1, %ymm1, %xmm4 ; AVX512-FCP-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,3,2,3,6,7,6,7] @@ -293,10 +291,8 @@ define void @load_i32_stride8_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr ; AVX512DQ-FCP-NEXT: vmovdqa (%rdi), %ymm4 ; AVX512DQ-FCP-NEXT: vpunpckldq {{.*#+}} ymm5 = ymm4[0],ymm1[0],ymm4[1],ymm1[1],ymm4[4],ymm1[4],ymm4[5],ymm1[5] ; AVX512DQ-FCP-NEXT: vextracti128 $1, %ymm5, %xmm5 -; AVX512DQ-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm6 = [5,13,5,5,5,13,5,5] -; AVX512DQ-FCP-NEXT: # ymm6 = mem[0,1,0,1] +; AVX512DQ-FCP-NEXT: vpmovsxbd {{.*#+}} xmm6 = [5,13,5,5] ; AVX512DQ-FCP-NEXT: vpermi2d %ymm1, %ymm4, %ymm6 -; AVX512DQ-FCP-NEXT: vextracti128 $1, %ymm6, %xmm6 ; AVX512DQ-FCP-NEXT: vpunpckhdq {{.*#+}} ymm1 = ymm4[2],ymm1[2],ymm4[3],ymm1[3],ymm4[6],ymm1[6],ymm4[7],ymm1[7] ; AVX512DQ-FCP-NEXT: vextracti128 $1, %ymm1, %xmm4 ; AVX512DQ-FCP-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,3,2,3,6,7,6,7] @@ -360,10 +356,8 @@ define void @load_i32_stride8_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr ; AVX512BW-FCP-NEXT: vmovdqa (%rdi), %ymm4 ; AVX512BW-FCP-NEXT: vpunpckldq {{.*#+}} ymm5 = ymm4[0],ymm1[0],ymm4[1],ymm1[1],ymm4[4],ymm1[4],ymm4[5],ymm1[5] ; AVX512BW-FCP-NEXT: vextracti128 $1, %ymm5, %xmm5 -; AVX512BW-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm6 = [5,13,5,5,5,13,5,5] -; AVX512BW-FCP-NEXT: # ymm6 = mem[0,1,0,1] +; AVX512BW-FCP-NEXT: vpmovsxbd {{.*#+}} xmm6 = [5,13,5,5] ; AVX512BW-FCP-NEXT: vpermi2d %ymm1, %ymm4, %ymm6 -; AVX512BW-FCP-NEXT: vextracti128 $1, %ymm6, %xmm6 ; AVX512BW-FCP-NEXT: vpunpckhdq {{.*#+}} ymm1 = ymm4[2],ymm1[2],ymm4[3],ymm1[3],ymm4[6],ymm1[6],ymm4[7],ymm1[7] ; AVX512BW-FCP-NEXT: vextracti128 $1, %ymm1, %xmm4 ; AVX512BW-FCP-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,3,2,3,6,7,6,7] @@ -427,10 +421,8 @@ define void @load_i32_stride8_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr ; AVX512DQ-BW-FCP-NEXT: vmovdqa (%rdi), %ymm4 ; AVX512DQ-BW-FCP-NEXT: vpunpckldq {{.*#+}} ymm5 = ymm4[0],ymm1[0],ymm4[1],ymm1[1],ymm4[4],ymm1[4],ymm4[5],ymm1[5] ; AVX512DQ-BW-FCP-NEXT: vextracti128 $1, %ymm5, %xmm5 -; AVX512DQ-BW-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm6 = [5,13,5,5,5,13,5,5] -; AVX512DQ-BW-FCP-NEXT: # ymm6 = mem[0,1,0,1] +; AVX512DQ-BW-FCP-NEXT: vpmovsxbd {{.*#+}} xmm6 = [5,13,5,5] ; AVX512DQ-BW-FCP-NEXT: vpermi2d %ymm1, %ymm4, %ymm6 -; AVX512DQ-BW-FCP-NEXT: vextracti128 $1, %ymm6, %xmm6 ; AVX512DQ-BW-FCP-NEXT: vpunpckhdq {{.*#+}} ymm1 = ymm4[2],ymm1[2],ymm4[3],ymm1[3],ymm4[6],ymm1[6],ymm4[7],ymm1[7] ; AVX512DQ-BW-FCP-NEXT: vextracti128 $1, %ymm1, %xmm4 ; AVX512DQ-BW-FCP-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,3,2,3,6,7,6,7] diff --git a/llvm/test/Demangle/ms-placeholder-return-type.test b/llvm/test/Demangle/ms-placeholder-return-type.test new file mode 100644 index 000000000000..18038e636c8d --- /dev/null +++ b/llvm/test/Demangle/ms-placeholder-return-type.test @@ -0,0 +1,18 @@ +; RUN: llvm-undname < %s | FileCheck %s
+
+; CHECK-NOT: Invalid mangled name
+
+?TestNonTemplateAuto@@YA@XZ
+; CHECK: __cdecl TestNonTemplateAuto(void)
+
+??$AutoT@X@@YA?A_PXZ
+; CHECK: auto __cdecl AutoT<void>(void)
+
+??$AutoT@X@@YA?B_PXZ
+; CHECK: auto const __cdecl AutoT<void>(void)
+
+??$AutoT@X@@YA?A_TXZ
+; CHECK: decltype(auto) __cdecl AutoT<void>(void)
+
+??$AutoT@X@@YA?B_TXZ
+; CHECK: decltype(auto) const __cdecl AutoT<void>(void)
diff --git a/llvm/test/MC/Disassembler/X86/avx10.2-com-ef-32.txt b/llvm/test/MC/Disassembler/X86/avx10.2-com-ef-32.txt new file mode 100644 index 000000000000..e7adacbbf88c --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/avx10.2-com-ef-32.txt @@ -0,0 +1,195 @@ +# RUN: llvm-mc --disassemble %s -triple=i386 | FileCheck %s --check-prefixes=ATT +# RUN: llvm-mc --disassemble %s -triple=i386 --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL + +# ATT: vcomxsd %xmm3, %xmm2 +# INTEL: vcomxsd xmm2, xmm3 +0x62,0xf1,0xfe,0x08,0x2f,0xd3 + +# ATT: vcomxsd {sae}, %xmm3, %xmm2 +# INTEL: vcomxsd xmm2, xmm3, {sae} +0x62,0xf1,0xfe,0x18,0x2f,0xd3 + +# ATT: vcomxsd 268435456(%esp,%esi,8), %xmm2 +# INTEL: vcomxsd xmm2, qword ptr [esp + 8*esi + 268435456] +0x62,0xf1,0xfe,0x08,0x2f,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vcomxsd 291(%edi,%eax,4), %xmm2 +# INTEL: vcomxsd xmm2, qword ptr [edi + 4*eax + 291] +0x62,0xf1,0xfe,0x08,0x2f,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vcomxsd (%eax), %xmm2 +# INTEL: vcomxsd xmm2, qword ptr [eax] +0x62,0xf1,0xfe,0x08,0x2f,0x10 + +# ATT: vcomxsd -256(,%ebp,2), %xmm2 +# INTEL: vcomxsd xmm2, qword ptr [2*ebp - 256] +0x62,0xf1,0xfe,0x08,0x2f,0x14,0x6d,0x00,0xff,0xff,0xff + +# ATT: vcomxsd 1016(%ecx), %xmm2 +# INTEL: vcomxsd xmm2, qword ptr [ecx + 1016] +0x62,0xf1,0xfe,0x08,0x2f,0x51,0x7f + +# ATT: vcomxsd -1024(%edx), %xmm2 +# INTEL: vcomxsd xmm2, qword ptr [edx - 1024] +0x62,0xf1,0xfe,0x08,0x2f,0x52,0x80 + +# ATT: vcomxsh %xmm3, %xmm2 +# INTEL: vcomxsh xmm2, xmm3 +0x62,0xf5,0x7f,0x08,0x2f,0xd3 + +# ATT: vcomxsh {sae}, %xmm3, %xmm2 +# INTEL: vcomxsh xmm2, xmm3, {sae} +0x62,0xf5,0x7f,0x18,0x2f,0xd3 + +# ATT: vcomxsh 268435456(%esp,%esi,8), %xmm2 +# INTEL: vcomxsh xmm2, word ptr [esp + 8*esi + 268435456] +0x62,0xf5,0x7f,0x08,0x2f,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vcomxsh 291(%edi,%eax,4), %xmm2 +# INTEL: vcomxsh xmm2, word ptr [edi + 4*eax + 291] +0x62,0xf5,0x7f,0x08,0x2f,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vcomxsh (%eax), %xmm2 +# INTEL: vcomxsh xmm2, word ptr [eax] +0x62,0xf5,0x7f,0x08,0x2f,0x10 + +# ATT: vcomxsh -64(,%ebp,2), %xmm2 +# INTEL: vcomxsh xmm2, word ptr [2*ebp - 64] +0x62,0xf5,0x7f,0x08,0x2f,0x14,0x6d,0xc0,0xff,0xff,0xff + +# ATT: vcomxsh 254(%ecx), %xmm2 +# INTEL: vcomxsh xmm2, word ptr [ecx + 254] +0x62,0xf5,0x7f,0x08,0x2f,0x51,0x7f + +# ATT: vcomxsh -256(%edx), %xmm2 +# INTEL: vcomxsh xmm2, word ptr [edx - 256] +0x62,0xf5,0x7f,0x08,0x2f,0x52,0x80 + +# ATT: vcomxss %xmm3, %xmm2 +# INTEL: vcomxss xmm2, xmm3 +0x62,0xf1,0x7f,0x08,0x2f,0xd3 + +# ATT: vcomxss {sae}, %xmm3, %xmm2 +# INTEL: vcomxss xmm2, xmm3, {sae} +0x62,0xf1,0x7f,0x18,0x2f,0xd3 + +# ATT: vcomxss 268435456(%esp,%esi,8), %xmm2 +# INTEL: vcomxss xmm2, dword ptr [esp + 8*esi + 268435456] +0x62,0xf1,0x7f,0x08,0x2f,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vcomxss 291(%edi,%eax,4), %xmm2 +# INTEL: vcomxss xmm2, dword ptr [edi + 4*eax + 291] +0x62,0xf1,0x7f,0x08,0x2f,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vcomxss (%eax), %xmm2 +# INTEL: vcomxss xmm2, dword ptr [eax] +0x62,0xf1,0x7f,0x08,0x2f,0x10 + +# ATT: vcomxss -128(,%ebp,2), %xmm2 +# INTEL: vcomxss xmm2, dword ptr [2*ebp - 128] +0x62,0xf1,0x7f,0x08,0x2f,0x14,0x6d,0x80,0xff,0xff,0xff + +# ATT: vcomxss 508(%ecx), %xmm2 +# INTEL: vcomxss xmm2, dword ptr [ecx + 508] +0x62,0xf1,0x7f,0x08,0x2f,0x51,0x7f + +# ATT: vcomxss -512(%edx), %xmm2 +# INTEL: vcomxss xmm2, dword ptr [edx - 512] +0x62,0xf1,0x7f,0x08,0x2f,0x52,0x80 + +# ATT: vucomxsd %xmm3, %xmm2 +# INTEL: vucomxsd xmm2, xmm3 +0x62,0xf1,0xfe,0x08,0x2e,0xd3 + +# ATT: vucomxsd {sae}, %xmm3, %xmm2 +# INTEL: vucomxsd xmm2, xmm3, {sae} +0x62,0xf1,0xfe,0x18,0x2e,0xd3 + +# ATT: vucomxsd 268435456(%esp,%esi,8), %xmm2 +# INTEL: vucomxsd xmm2, qword ptr [esp + 8*esi + 268435456] +0x62,0xf1,0xfe,0x08,0x2e,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vucomxsd 291(%edi,%eax,4), %xmm2 +# INTEL: vucomxsd xmm2, qword ptr [edi + 4*eax + 291] +0x62,0xf1,0xfe,0x08,0x2e,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vucomxsd (%eax), %xmm2 +# INTEL: vucomxsd xmm2, qword ptr [eax] +0x62,0xf1,0xfe,0x08,0x2e,0x10 + +# ATT: vucomxsd -256(,%ebp,2), %xmm2 +# INTEL: vucomxsd xmm2, qword ptr [2*ebp - 256] +0x62,0xf1,0xfe,0x08,0x2e,0x14,0x6d,0x00,0xff,0xff,0xff + +# ATT: vucomxsd 1016(%ecx), %xmm2 +# INTEL: vucomxsd xmm2, qword ptr [ecx + 1016] +0x62,0xf1,0xfe,0x08,0x2e,0x51,0x7f + +# ATT: vucomxsd -1024(%edx), %xmm2 +# INTEL: vucomxsd xmm2, qword ptr [edx - 1024] +0x62,0xf1,0xfe,0x08,0x2e,0x52,0x80 + +# ATT: vucomxsh %xmm3, %xmm2 +# INTEL: vucomxsh xmm2, xmm3 +0x62,0xf5,0x7f,0x08,0x2e,0xd3 + +# ATT: vucomxsh {sae}, %xmm3, %xmm2 +# INTEL: vucomxsh xmm2, xmm3, {sae} +0x62,0xf5,0x7f,0x18,0x2e,0xd3 + +# ATT: vucomxsh 268435456(%esp,%esi,8), %xmm2 +# INTEL: vucomxsh xmm2, word ptr [esp + 8*esi + 268435456] +0x62,0xf5,0x7f,0x08,0x2e,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vucomxsh 291(%edi,%eax,4), %xmm2 +# INTEL: vucomxsh xmm2, word ptr [edi + 4*eax + 291] +0x62,0xf5,0x7f,0x08,0x2e,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vucomxsh (%eax), %xmm2 +# INTEL: vucomxsh xmm2, word ptr [eax] +0x62,0xf5,0x7f,0x08,0x2e,0x10 + +# ATT: vucomxsh -64(,%ebp,2), %xmm2 +# INTEL: vucomxsh xmm2, word ptr [2*ebp - 64] +0x62,0xf5,0x7f,0x08,0x2e,0x14,0x6d,0xc0,0xff,0xff,0xff + +# ATT: vucomxsh 254(%ecx), %xmm2 +# INTEL: vucomxsh xmm2, word ptr [ecx + 254] +0x62,0xf5,0x7f,0x08,0x2e,0x51,0x7f + +# ATT: vucomxsh -256(%edx), %xmm2 +# INTEL: vucomxsh xmm2, word ptr [edx - 256] +0x62,0xf5,0x7f,0x08,0x2e,0x52,0x80 + +# ATT: vucomxss %xmm3, %xmm2 +# INTEL: vucomxss xmm2, xmm3 +0x62,0xf1,0x7f,0x08,0x2e,0xd3 + +# ATT: vucomxss {sae}, %xmm3, %xmm2 +# INTEL: vucomxss xmm2, xmm3, {sae} +0x62,0xf1,0x7f,0x18,0x2e,0xd3 + +# ATT: vucomxss 268435456(%esp,%esi,8), %xmm2 +# INTEL: vucomxss xmm2, dword ptr [esp + 8*esi + 268435456] +0x62,0xf1,0x7f,0x08,0x2e,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vucomxss 291(%edi,%eax,4), %xmm2 +# INTEL: vucomxss xmm2, dword ptr [edi + 4*eax + 291] +0x62,0xf1,0x7f,0x08,0x2e,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vucomxss (%eax), %xmm2 +# INTEL: vucomxss xmm2, dword ptr [eax] +0x62,0xf1,0x7f,0x08,0x2e,0x10 + +# ATT: vucomxss -128(,%ebp,2), %xmm2 +# INTEL: vucomxss xmm2, dword ptr [2*ebp - 128] +0x62,0xf1,0x7f,0x08,0x2e,0x14,0x6d,0x80,0xff,0xff,0xff + +# ATT: vucomxss 508(%ecx), %xmm2 +# INTEL: vucomxss xmm2, dword ptr [ecx + 508] +0x62,0xf1,0x7f,0x08,0x2e,0x51,0x7f + +# ATT: vucomxss -512(%edx), %xmm2 +# INTEL: vucomxss xmm2, dword ptr [edx - 512] +0x62,0xf1,0x7f,0x08,0x2e,0x52,0x80 + diff --git a/llvm/test/MC/Disassembler/X86/avx10.2-com-ef-64.txt b/llvm/test/MC/Disassembler/X86/avx10.2-com-ef-64.txt new file mode 100644 index 000000000000..ea580fe8d508 --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/avx10.2-com-ef-64.txt @@ -0,0 +1,195 @@ +# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT +# RUN: llvm-mc --disassemble %s -triple=x86_64 --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL + +# ATT: vcomxsd %xmm23, %xmm22 +# INTEL: vcomxsd xmm22, xmm23 +0x62,0xa1,0xfe,0x08,0x2f,0xf7 + +# ATT: vcomxsd {sae}, %xmm23, %xmm22 +# INTEL: vcomxsd xmm22, xmm23, {sae} +0x62,0xa1,0xfe,0x18,0x2f,0xf7 + +# ATT: vcomxsd 268435456(%rbp,%r14,8), %xmm22 +# INTEL: vcomxsd xmm22, qword ptr [rbp + 8*r14 + 268435456] +0x62,0xa1,0xfe,0x08,0x2f,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vcomxsd 291(%r8,%rax,4), %xmm22 +# INTEL: vcomxsd xmm22, qword ptr [r8 + 4*rax + 291] +0x62,0xc1,0xfe,0x08,0x2f,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vcomxsd (%rip), %xmm22 +# INTEL: vcomxsd xmm22, qword ptr [rip] +0x62,0xe1,0xfe,0x08,0x2f,0x35,0x00,0x00,0x00,0x00 + +# ATT: vcomxsd -256(,%rbp,2), %xmm22 +# INTEL: vcomxsd xmm22, qword ptr [2*rbp - 256] +0x62,0xe1,0xfe,0x08,0x2f,0x34,0x6d,0x00,0xff,0xff,0xff + +# ATT: vcomxsd 1016(%rcx), %xmm22 +# INTEL: vcomxsd xmm22, qword ptr [rcx + 1016] +0x62,0xe1,0xfe,0x08,0x2f,0x71,0x7f + +# ATT: vcomxsd -1024(%rdx), %xmm22 +# INTEL: vcomxsd xmm22, qword ptr [rdx - 1024] +0x62,0xe1,0xfe,0x08,0x2f,0x72,0x80 + +# ATT: vcomxsh %xmm23, %xmm22 +# INTEL: vcomxsh xmm22, xmm23 +0x62,0xa5,0x7f,0x08,0x2f,0xf7 + +# ATT: vcomxsh {sae}, %xmm23, %xmm22 +# INTEL: vcomxsh xmm22, xmm23, {sae} +0x62,0xa5,0x7f,0x18,0x2f,0xf7 + +# ATT: vcomxsh 268435456(%rbp,%r14,8), %xmm22 +# INTEL: vcomxsh xmm22, word ptr [rbp + 8*r14 + 268435456] +0x62,0xa5,0x7f,0x08,0x2f,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vcomxsh 291(%r8,%rax,4), %xmm22 +# INTEL: vcomxsh xmm22, word ptr [r8 + 4*rax + 291] +0x62,0xc5,0x7f,0x08,0x2f,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vcomxsh (%rip), %xmm22 +# INTEL: vcomxsh xmm22, word ptr [rip] +0x62,0xe5,0x7f,0x08,0x2f,0x35,0x00,0x00,0x00,0x00 + +# ATT: vcomxsh -64(,%rbp,2), %xmm22 +# INTEL: vcomxsh xmm22, word ptr [2*rbp - 64] +0x62,0xe5,0x7f,0x08,0x2f,0x34,0x6d,0xc0,0xff,0xff,0xff + +# ATT: vcomxsh 254(%rcx), %xmm22 +# INTEL: vcomxsh xmm22, word ptr [rcx + 254] +0x62,0xe5,0x7f,0x08,0x2f,0x71,0x7f + +# ATT: vcomxsh -256(%rdx), %xmm22 +# INTEL: vcomxsh xmm22, word ptr [rdx - 256] +0x62,0xe5,0x7f,0x08,0x2f,0x72,0x80 + +# ATT: vcomxss %xmm23, %xmm22 +# INTEL: vcomxss xmm22, xmm23 +0x62,0xa1,0x7f,0x08,0x2f,0xf7 + +# ATT: vcomxss {sae}, %xmm23, %xmm22 +# INTEL: vcomxss xmm22, xmm23, {sae} +0x62,0xa1,0x7f,0x18,0x2f,0xf7 + +# ATT: vcomxss 268435456(%rbp,%r14,8), %xmm22 +# INTEL: vcomxss xmm22, dword ptr [rbp + 8*r14 + 268435456] +0x62,0xa1,0x7f,0x08,0x2f,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vcomxss 291(%r8,%rax,4), %xmm22 +# INTEL: vcomxss xmm22, dword ptr [r8 + 4*rax + 291] +0x62,0xc1,0x7f,0x08,0x2f,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vcomxss (%rip), %xmm22 +# INTEL: vcomxss xmm22, dword ptr [rip] +0x62,0xe1,0x7f,0x08,0x2f,0x35,0x00,0x00,0x00,0x00 + +# ATT: vcomxss -128(,%rbp,2), %xmm22 +# INTEL: vcomxss xmm22, dword ptr [2*rbp - 128] +0x62,0xe1,0x7f,0x08,0x2f,0x34,0x6d,0x80,0xff,0xff,0xff + +# ATT: vcomxss 508(%rcx), %xmm22 +# INTEL: vcomxss xmm22, dword ptr [rcx + 508] +0x62,0xe1,0x7f,0x08,0x2f,0x71,0x7f + +# ATT: vcomxss -512(%rdx), %xmm22 +# INTEL: vcomxss xmm22, dword ptr [rdx - 512] +0x62,0xe1,0x7f,0x08,0x2f,0x72,0x80 + +# ATT: vucomxsd %xmm23, %xmm22 +# INTEL: vucomxsd xmm22, xmm23 +0x62,0xa1,0xfe,0x08,0x2e,0xf7 + +# ATT: vucomxsd {sae}, %xmm23, %xmm22 +# INTEL: vucomxsd xmm22, xmm23, {sae} +0x62,0xa1,0xfe,0x18,0x2e,0xf7 + +# ATT: vucomxsd 268435456(%rbp,%r14,8), %xmm22 +# INTEL: vucomxsd xmm22, qword ptr [rbp + 8*r14 + 268435456] +0x62,0xa1,0xfe,0x08,0x2e,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vucomxsd 291(%r8,%rax,4), %xmm22 +# INTEL: vucomxsd xmm22, qword ptr [r8 + 4*rax + 291] +0x62,0xc1,0xfe,0x08,0x2e,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vucomxsd (%rip), %xmm22 +# INTEL: vucomxsd xmm22, qword ptr [rip] +0x62,0xe1,0xfe,0x08,0x2e,0x35,0x00,0x00,0x00,0x00 + +# ATT: vucomxsd -256(,%rbp,2), %xmm22 +# INTEL: vucomxsd xmm22, qword ptr [2*rbp - 256] +0x62,0xe1,0xfe,0x08,0x2e,0x34,0x6d,0x00,0xff,0xff,0xff + +# ATT: vucomxsd 1016(%rcx), %xmm22 +# INTEL: vucomxsd xmm22, qword ptr [rcx + 1016] +0x62,0xe1,0xfe,0x08,0x2e,0x71,0x7f + +# ATT: vucomxsd -1024(%rdx), %xmm22 +# INTEL: vucomxsd xmm22, qword ptr [rdx - 1024] +0x62,0xe1,0xfe,0x08,0x2e,0x72,0x80 + +# ATT: vucomxsh %xmm23, %xmm22 +# INTEL: vucomxsh xmm22, xmm23 +0x62,0xa5,0x7f,0x08,0x2e,0xf7 + +# ATT: vucomxsh {sae}, %xmm23, %xmm22 +# INTEL: vucomxsh xmm22, xmm23, {sae} +0x62,0xa5,0x7f,0x18,0x2e,0xf7 + +# ATT: vucomxsh 268435456(%rbp,%r14,8), %xmm22 +# INTEL: vucomxsh xmm22, word ptr [rbp + 8*r14 + 268435456] +0x62,0xa5,0x7f,0x08,0x2e,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vucomxsh 291(%r8,%rax,4), %xmm22 +# INTEL: vucomxsh xmm22, word ptr [r8 + 4*rax + 291] +0x62,0xc5,0x7f,0x08,0x2e,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vucomxsh (%rip), %xmm22 +# INTEL: vucomxsh xmm22, word ptr [rip] +0x62,0xe5,0x7f,0x08,0x2e,0x35,0x00,0x00,0x00,0x00 + +# ATT: vucomxsh -64(,%rbp,2), %xmm22 +# INTEL: vucomxsh xmm22, word ptr [2*rbp - 64] +0x62,0xe5,0x7f,0x08,0x2e,0x34,0x6d,0xc0,0xff,0xff,0xff + +# ATT: vucomxsh 254(%rcx), %xmm22 +# INTEL: vucomxsh xmm22, word ptr [rcx + 254] +0x62,0xe5,0x7f,0x08,0x2e,0x71,0x7f + +# ATT: vucomxsh -256(%rdx), %xmm22 +# INTEL: vucomxsh xmm22, word ptr [rdx - 256] +0x62,0xe5,0x7f,0x08,0x2e,0x72,0x80 + +# ATT: vucomxss %xmm23, %xmm22 +# INTEL: vucomxss xmm22, xmm23 +0x62,0xa1,0x7f,0x08,0x2e,0xf7 + +# ATT: vucomxss {sae}, %xmm23, %xmm22 +# INTEL: vucomxss xmm22, xmm23, {sae} +0x62,0xa1,0x7f,0x18,0x2e,0xf7 + +# ATT: vucomxss 268435456(%rbp,%r14,8), %xmm22 +# INTEL: vucomxss xmm22, dword ptr [rbp + 8*r14 + 268435456] +0x62,0xa1,0x7f,0x08,0x2e,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vucomxss 291(%r8,%rax,4), %xmm22 +# INTEL: vucomxss xmm22, dword ptr [r8 + 4*rax + 291] +0x62,0xc1,0x7f,0x08,0x2e,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vucomxss (%rip), %xmm22 +# INTEL: vucomxss xmm22, dword ptr [rip] +0x62,0xe1,0x7f,0x08,0x2e,0x35,0x00,0x00,0x00,0x00 + +# ATT: vucomxss -128(,%rbp,2), %xmm22 +# INTEL: vucomxss xmm22, dword ptr [2*rbp - 128] +0x62,0xe1,0x7f,0x08,0x2e,0x34,0x6d,0x80,0xff,0xff,0xff + +# ATT: vucomxss 508(%rcx), %xmm22 +# INTEL: vucomxss xmm22, dword ptr [rcx + 508] +0x62,0xe1,0x7f,0x08,0x2e,0x71,0x7f + +# ATT: vucomxss -512(%rdx), %xmm22 +# INTEL: vucomxss xmm22, dword ptr [rdx - 512] +0x62,0xe1,0x7f,0x08,0x2e,0x72,0x80 + diff --git a/llvm/test/MC/Disassembler/X86/avx10.2-copy-32.txt b/llvm/test/MC/Disassembler/X86/avx10.2-copy-32.txt new file mode 100644 index 000000000000..e86c2340a486 --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/avx10.2-copy-32.txt @@ -0,0 +1,34 @@ +# RUN: llvm-mc --disassemble %s -triple=i386 | FileCheck %s --check-prefixes=ATT +# RUN: llvm-mc --disassemble %s -triple=i386 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL + +# ATT: vmovd (%ecx), %xmm5 +# INTEL: vmovd xmm5, dword ptr [ecx] +0x62 0xf1 0x7e 0x08 0x7e 0x29 + +# ATT: vmovd %xmm5, (%ecx) +# INTEL: vmovd dword ptr [ecx], xmm5 +0x62 0xf1 0x7d 0x08 0xd6 0x29 + +# ATT: vmovd %xmm2, %xmm1 +# INTEL: vmovd xmm1, xmm2 +0x62 0xf1 0x7e 0x08 0x7e 0xca + +# ATT: vmovd %xmm2, %xmm1 +# INTEL: vmovd xmm1, xmm2 +0x62 0xf1 0x7d 0x08 0xd6 0xca + +# ATT: vmovw %xmm5, (%ecx) +# INTEL: vmovw dword ptr [ecx], xmm5 +0x62 0xf5 0x7e 0x08 0x7e 0x29 + +# ATT: vmovw (%ecx), %xmm5 +# INTEL: vmovw xmm5, word ptr [ecx] +0x62 0xf5 0x7e 0x08 0x6e 0x29 + +# ATT: vmovw %xmm2, %xmm1 +# INTEL: vmovw xmm1, xmm2 +0x62 0xf5 0x7e 0x08 0x6e 0xca + +# ATT: vmovw %xmm2, %xmm1 +# INTEL: vmovw xmm1, xmm2 +0x62 0xf5 0x7e 0x08 0x7e 0xca diff --git a/llvm/test/MC/Disassembler/X86/avx10.2-copy-64.txt b/llvm/test/MC/Disassembler/X86/avx10.2-copy-64.txt new file mode 100644 index 000000000000..36ddd75a77ad --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/avx10.2-copy-64.txt @@ -0,0 +1,34 @@ +# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT +# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL + +# ATT: vmovd (%rcx), %xmm29 +# INTEL: vmovd xmm29, dword ptr [rcx] +0x62 0x61 0x7e 0x08 0x7e 0x29 + +# ATT: vmovd %xmm29, (%rcx) +# INTEL: vmovd dword ptr [rcx], xmm29 +0x62 0x61 0x7d 0x08 0xd6 0x29 + +# ATT: vmovd %xmm22, %xmm21 +# INTEL: vmovd xmm21, xmm22 +0x62 0xa1 0x7e 0x08 0x7e 0xee + +# ATT: vmovd %xmm22, %xmm21 +# INTEL: vmovd xmm21, xmm22 +0x62 0xa1 0x7d 0x08 0xd6 0xee + +# ATT: vmovw %xmm29, (%rcx) +# INTEL: vmovw dword ptr [rcx], xmm29 +0x62 0x65 0x7e 0x08 0x7e 0x29 + +# ATT: vmovw (%rcx), %xmm29 +# INTEL: vmovw xmm29, word ptr [rcx] +0x62 0x65 0x7e 0x08 0x6e 0x29 + +# ATT: vmovw %xmm22, %xmm21 +# INTEL: vmovw xmm21, xmm22 +0x62 0xa5 0x7e 0x08 0x6e 0xee + +# ATT: vmovw %xmm22, %xmm21 +# INTEL: vmovw xmm21, xmm22 +0x62 0xa5 0x7e 0x08 0x7e 0xee diff --git a/llvm/test/MC/WebAssembly/annotations.s b/llvm/test/MC/WebAssembly/annotations.s index b1f97daccccd..3e727591afa9 100644 --- a/llvm/test/MC/WebAssembly/annotations.s +++ b/llvm/test/MC/WebAssembly/annotations.s @@ -33,6 +33,22 @@ test_annotation: rethrow 0 end_try end_try + + block exnref + block + block () -> (i32, exnref) + block i32 + try_table (catch __cpp_exception 0) (catch_ref __c_longjmp 1) (catch_all 2) (catch_all_ref 3) + end_try_table + return + end_block + return + end_block + return + end_block + return + end_block + drop end_function @@ -61,5 +77,24 @@ test_annotation: # CHECK-NEXT: rethrow 0 # to caller # CHECK-NEXT: end_try # label3: # CHECK-NEXT: end_try # label0: + +# CHECK: block exnref +# CHECK-NEXT: block +# CHECK-NEXT: block () -> (i32, exnref) +# CHECK-NEXT: block i32 +# CHECK-NEXT: try_table (catch __cpp_exception 0) (catch_ref __c_longjmp 1) (catch_all 2) (catch_all_ref 3) # 0: down to label10 +# CHECK-NEXT: # 1: down to label9 +# CHECK-NEXT: # 2: down to label8 +# CHECK-NEXT: # 3: down to label7 +# CHECK-NEXT: end_try_table # label11: +# CHECK-NEXT: return +# CHECK-NEXT: end_block # label10: +# CHECK-NEXT: return +# CHECK-NEXT: end_block # label9: +# CHECK-NEXT: return +# CHECK-NEXT: end_block # label8: +# CHECK-NEXT: return +# CHECK-NEXT: end_block # label7: +# CHECK-NEXT: drop # CHECK-NEXT: end_function diff --git a/llvm/test/MC/WebAssembly/eh-assembly.s b/llvm/test/MC/WebAssembly/eh-assembly.s index a769bc447d0b..b4d6b324d96e 100644 --- a/llvm/test/MC/WebAssembly/eh-assembly.s +++ b/llvm/test/MC/WebAssembly/eh-assembly.s @@ -1,4 +1,6 @@ # RUN: llvm-mc -triple=wasm32-unknown-unknown -mattr=+exception-handling --no-type-check < %s | FileCheck %s +# Check that it converts to .o without errors, but don't check any output: +# RUN: llvm-mc -triple=wasm32-unknown-unknown -filetype=obj -mattr=+exception-handling --no-type-check -o %t.o < %s .tagtype __cpp_exception i32 .tagtype __c_longjmp i32 diff --git a/llvm/test/MC/X86/avx10.2-com-ef-32-att.s b/llvm/test/MC/X86/avx10.2-com-ef-32-att.s new file mode 100644 index 000000000000..8883bb3d6775 --- /dev/null +++ b/llvm/test/MC/X86/avx10.2-com-ef-32-att.s @@ -0,0 +1,194 @@ +// RUN: llvm-mc -triple i386 --show-encoding %s | FileCheck %s + +// CHECK: vcomxsd %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2f,0xd3] + vcomxsd %xmm3, %xmm2 + +// CHECK: vcomxsd {sae}, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf1,0xfe,0x18,0x2f,0xd3] + vcomxsd {sae}, %xmm3, %xmm2 + +// CHECK: vcomxsd 268435456(%esp,%esi,8), %xmm2 +// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2f,0x94,0xf4,0x00,0x00,0x00,0x10] + vcomxsd 268435456(%esp,%esi,8), %xmm2 + +// CHECK: vcomxsd 291(%edi,%eax,4), %xmm2 +// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2f,0x94,0x87,0x23,0x01,0x00,0x00] + vcomxsd 291(%edi,%eax,4), %xmm2 + +// CHECK: vcomxsd (%eax), %xmm2 +// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2f,0x10] + vcomxsd (%eax), %xmm2 + +// CHECK: vcomxsd -256(,%ebp,2), %xmm2 +// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2f,0x14,0x6d,0x00,0xff,0xff,0xff] + vcomxsd -256(,%ebp,2), %xmm2 + +// CHECK: vcomxsd 1016(%ecx), %xmm2 +// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2f,0x51,0x7f] + vcomxsd 1016(%ecx), %xmm2 + +// CHECK: vcomxsd -1024(%edx), %xmm2 +// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2f,0x52,0x80] + vcomxsd -1024(%edx), %xmm2 + +// CHECK: vcomxsh %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2f,0xd3] + vcomxsh %xmm3, %xmm2 + +// CHECK: vcomxsh {sae}, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf5,0x7f,0x18,0x2f,0xd3] + vcomxsh {sae}, %xmm3, %xmm2 + +// CHECK: vcomxsh 268435456(%esp,%esi,8), %xmm2 +// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2f,0x94,0xf4,0x00,0x00,0x00,0x10] + vcomxsh 268435456(%esp,%esi,8), %xmm2 + +// CHECK: vcomxsh 291(%edi,%eax,4), %xmm2 +// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2f,0x94,0x87,0x23,0x01,0x00,0x00] + vcomxsh 291(%edi,%eax,4), %xmm2 + +// CHECK: vcomxsh (%eax), %xmm2 +// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2f,0x10] + vcomxsh (%eax), %xmm2 + +// CHECK: vcomxsh -64(,%ebp,2), %xmm2 +// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2f,0x14,0x6d,0xc0,0xff,0xff,0xff] + vcomxsh -64(,%ebp,2), %xmm2 + +// CHECK: vcomxsh 254(%ecx), %xmm2 +// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2f,0x51,0x7f] + vcomxsh 254(%ecx), %xmm2 + +// CHECK: vcomxsh -256(%edx), %xmm2 +// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2f,0x52,0x80] + vcomxsh -256(%edx), %xmm2 + +// CHECK: vcomxss %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2f,0xd3] + vcomxss %xmm3, %xmm2 + +// CHECK: vcomxss {sae}, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf1,0x7f,0x18,0x2f,0xd3] + vcomxss {sae}, %xmm3, %xmm2 + +// CHECK: vcomxss 268435456(%esp,%esi,8), %xmm2 +// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2f,0x94,0xf4,0x00,0x00,0x00,0x10] + vcomxss 268435456(%esp,%esi,8), %xmm2 + +// CHECK: vcomxss 291(%edi,%eax,4), %xmm2 +// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2f,0x94,0x87,0x23,0x01,0x00,0x00] + vcomxss 291(%edi,%eax,4), %xmm2 + +// CHECK: vcomxss (%eax), %xmm2 +// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2f,0x10] + vcomxss (%eax), %xmm2 + +// CHECK: vcomxss -128(,%ebp,2), %xmm2 +// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2f,0x14,0x6d,0x80,0xff,0xff,0xff] + vcomxss -128(,%ebp,2), %xmm2 + +// CHECK: vcomxss 508(%ecx), %xmm2 +// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2f,0x51,0x7f] + vcomxss 508(%ecx), %xmm2 + +// CHECK: vcomxss -512(%edx), %xmm2 +// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2f,0x52,0x80] + vcomxss -512(%edx), %xmm2 + +// CHECK: vucomxsd %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2e,0xd3] + vucomxsd %xmm3, %xmm2 + +// CHECK: vucomxsd {sae}, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf1,0xfe,0x18,0x2e,0xd3] + vucomxsd {sae}, %xmm3, %xmm2 + +// CHECK: vucomxsd 268435456(%esp,%esi,8), %xmm2 +// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2e,0x94,0xf4,0x00,0x00,0x00,0x10] + vucomxsd 268435456(%esp,%esi,8), %xmm2 + +// CHECK: vucomxsd 291(%edi,%eax,4), %xmm2 +// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2e,0x94,0x87,0x23,0x01,0x00,0x00] + vucomxsd 291(%edi,%eax,4), %xmm2 + +// CHECK: vucomxsd (%eax), %xmm2 +// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2e,0x10] + vucomxsd (%eax), %xmm2 + +// CHECK: vucomxsd -256(,%ebp,2), %xmm2 +// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2e,0x14,0x6d,0x00,0xff,0xff,0xff] + vucomxsd -256(,%ebp,2), %xmm2 + +// CHECK: vucomxsd 1016(%ecx), %xmm2 +// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2e,0x51,0x7f] + vucomxsd 1016(%ecx), %xmm2 + +// CHECK: vucomxsd -1024(%edx), %xmm2 +// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2e,0x52,0x80] + vucomxsd -1024(%edx), %xmm2 + +// CHECK: vucomxsh %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2e,0xd3] + vucomxsh %xmm3, %xmm2 + +// CHECK: vucomxsh {sae}, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf5,0x7f,0x18,0x2e,0xd3] + vucomxsh {sae}, %xmm3, %xmm2 + +// CHECK: vucomxsh 268435456(%esp,%esi,8), %xmm2 +// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2e,0x94,0xf4,0x00,0x00,0x00,0x10] + vucomxsh 268435456(%esp,%esi,8), %xmm2 + +// CHECK: vucomxsh 291(%edi,%eax,4), %xmm2 +// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2e,0x94,0x87,0x23,0x01,0x00,0x00] + vucomxsh 291(%edi,%eax,4), %xmm2 + +// CHECK: vucomxsh (%eax), %xmm2 +// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2e,0x10] + vucomxsh (%eax), %xmm2 + +// CHECK: vucomxsh -64(,%ebp,2), %xmm2 +// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2e,0x14,0x6d,0xc0,0xff,0xff,0xff] + vucomxsh -64(,%ebp,2), %xmm2 + +// CHECK: vucomxsh 254(%ecx), %xmm2 +// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2e,0x51,0x7f] + vucomxsh 254(%ecx), %xmm2 + +// CHECK: vucomxsh -256(%edx), %xmm2 +// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2e,0x52,0x80] + vucomxsh -256(%edx), %xmm2 + +// CHECK: vucomxss %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2e,0xd3] + vucomxss %xmm3, %xmm2 + +// CHECK: vucomxss {sae}, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf1,0x7f,0x18,0x2e,0xd3] + vucomxss {sae}, %xmm3, %xmm2 + +// CHECK: vucomxss 268435456(%esp,%esi,8), %xmm2 +// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2e,0x94,0xf4,0x00,0x00,0x00,0x10] + vucomxss 268435456(%esp,%esi,8), %xmm2 + +// CHECK: vucomxss 291(%edi,%eax,4), %xmm2 +// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2e,0x94,0x87,0x23,0x01,0x00,0x00] + vucomxss 291(%edi,%eax,4), %xmm2 + +// CHECK: vucomxss (%eax), %xmm2 +// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2e,0x10] + vucomxss (%eax), %xmm2 + +// CHECK: vucomxss -128(,%ebp,2), %xmm2 +// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2e,0x14,0x6d,0x80,0xff,0xff,0xff] + vucomxss -128(,%ebp,2), %xmm2 + +// CHECK: vucomxss 508(%ecx), %xmm2 +// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2e,0x51,0x7f] + vucomxss 508(%ecx), %xmm2 + +// CHECK: vucomxss -512(%edx), %xmm2 +// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2e,0x52,0x80] + vucomxss -512(%edx), %xmm2 + diff --git a/llvm/test/MC/X86/avx10.2-com-ef-32-intel.s b/llvm/test/MC/X86/avx10.2-com-ef-32-intel.s new file mode 100644 index 000000000000..9ff0484db133 --- /dev/null +++ b/llvm/test/MC/X86/avx10.2-com-ef-32-intel.s @@ -0,0 +1,194 @@ +// RUN: llvm-mc -triple i386 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +// CHECK: vcomxsd xmm2, xmm3 +// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2f,0xd3] + vcomxsd xmm2, xmm3 + +// CHECK: vcomxsd xmm2, xmm3, {sae} +// CHECK: encoding: [0x62,0xf1,0xfe,0x18,0x2f,0xd3] + vcomxsd xmm2, xmm3, {sae} + +// CHECK: vcomxsd xmm2, qword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2f,0x94,0xf4,0x00,0x00,0x00,0x10] + vcomxsd xmm2, qword ptr [esp + 8*esi + 268435456] + +// CHECK: vcomxsd xmm2, qword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2f,0x94,0x87,0x23,0x01,0x00,0x00] + vcomxsd xmm2, qword ptr [edi + 4*eax + 291] + +// CHECK: vcomxsd xmm2, qword ptr [eax] +// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2f,0x10] + vcomxsd xmm2, qword ptr [eax] + +// CHECK: vcomxsd xmm2, qword ptr [2*ebp - 256] +// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2f,0x14,0x6d,0x00,0xff,0xff,0xff] + vcomxsd xmm2, qword ptr [2*ebp - 256] + +// CHECK: vcomxsd xmm2, qword ptr [ecx + 1016] +// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2f,0x51,0x7f] + vcomxsd xmm2, qword ptr [ecx + 1016] + +// CHECK: vcomxsd xmm2, qword ptr [edx - 1024] +// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2f,0x52,0x80] + vcomxsd xmm2, qword ptr [edx - 1024] + +// CHECK: vcomxsh xmm2, xmm3 +// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2f,0xd3] + vcomxsh xmm2, xmm3 + +// CHECK: vcomxsh xmm2, xmm3, {sae} +// CHECK: encoding: [0x62,0xf5,0x7f,0x18,0x2f,0xd3] + vcomxsh xmm2, xmm3, {sae} + +// CHECK: vcomxsh xmm2, word ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2f,0x94,0xf4,0x00,0x00,0x00,0x10] + vcomxsh xmm2, word ptr [esp + 8*esi + 268435456] + +// CHECK: vcomxsh xmm2, word ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2f,0x94,0x87,0x23,0x01,0x00,0x00] + vcomxsh xmm2, word ptr [edi + 4*eax + 291] + +// CHECK: vcomxsh xmm2, word ptr [eax] +// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2f,0x10] + vcomxsh xmm2, word ptr [eax] + +// CHECK: vcomxsh xmm2, word ptr [2*ebp - 64] +// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2f,0x14,0x6d,0xc0,0xff,0xff,0xff] + vcomxsh xmm2, word ptr [2*ebp - 64] + +// CHECK: vcomxsh xmm2, word ptr [ecx + 254] +// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2f,0x51,0x7f] + vcomxsh xmm2, word ptr [ecx + 254] + +// CHECK: vcomxsh xmm2, word ptr [edx - 256] +// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2f,0x52,0x80] + vcomxsh xmm2, word ptr [edx - 256] + +// CHECK: vcomxss xmm2, xmm3 +// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2f,0xd3] + vcomxss xmm2, xmm3 + +// CHECK: vcomxss xmm2, xmm3, {sae} +// CHECK: encoding: [0x62,0xf1,0x7f,0x18,0x2f,0xd3] + vcomxss xmm2, xmm3, {sae} + +// CHECK: vcomxss xmm2, dword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2f,0x94,0xf4,0x00,0x00,0x00,0x10] + vcomxss xmm2, dword ptr [esp + 8*esi + 268435456] + +// CHECK: vcomxss xmm2, dword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2f,0x94,0x87,0x23,0x01,0x00,0x00] + vcomxss xmm2, dword ptr [edi + 4*eax + 291] + +// CHECK: vcomxss xmm2, dword ptr [eax] +// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2f,0x10] + vcomxss xmm2, dword ptr [eax] + +// CHECK: vcomxss xmm2, dword ptr [2*ebp - 128] +// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2f,0x14,0x6d,0x80,0xff,0xff,0xff] + vcomxss xmm2, dword ptr [2*ebp - 128] + +// CHECK: vcomxss xmm2, dword ptr [ecx + 508] +// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2f,0x51,0x7f] + vcomxss xmm2, dword ptr [ecx + 508] + +// CHECK: vcomxss xmm2, dword ptr [edx - 512] +// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2f,0x52,0x80] + vcomxss xmm2, dword ptr [edx - 512] + +// CHECK: vucomxsd xmm2, xmm3 +// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2e,0xd3] + vucomxsd xmm2, xmm3 + +// CHECK: vucomxsd xmm2, xmm3, {sae} +// CHECK: encoding: [0x62,0xf1,0xfe,0x18,0x2e,0xd3] + vucomxsd xmm2, xmm3, {sae} + +// CHECK: vucomxsd xmm2, qword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2e,0x94,0xf4,0x00,0x00,0x00,0x10] + vucomxsd xmm2, qword ptr [esp + 8*esi + 268435456] + +// CHECK: vucomxsd xmm2, qword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2e,0x94,0x87,0x23,0x01,0x00,0x00] + vucomxsd xmm2, qword ptr [edi + 4*eax + 291] + +// CHECK: vucomxsd xmm2, qword ptr [eax] +// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2e,0x10] + vucomxsd xmm2, qword ptr [eax] + +// CHECK: vucomxsd xmm2, qword ptr [2*ebp - 256] +// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2e,0x14,0x6d,0x00,0xff,0xff,0xff] + vucomxsd xmm2, qword ptr [2*ebp - 256] + +// CHECK: vucomxsd xmm2, qword ptr [ecx + 1016] +// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2e,0x51,0x7f] + vucomxsd xmm2, qword ptr [ecx + 1016] + +// CHECK: vucomxsd xmm2, qword ptr [edx - 1024] +// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2e,0x52,0x80] + vucomxsd xmm2, qword ptr [edx - 1024] + +// CHECK: vucomxsh xmm2, xmm3 +// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2e,0xd3] + vucomxsh xmm2, xmm3 + +// CHECK: vucomxsh xmm2, xmm3, {sae} +// CHECK: encoding: [0x62,0xf5,0x7f,0x18,0x2e,0xd3] + vucomxsh xmm2, xmm3, {sae} + +// CHECK: vucomxsh xmm2, word ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2e,0x94,0xf4,0x00,0x00,0x00,0x10] + vucomxsh xmm2, word ptr [esp + 8*esi + 268435456] + +// CHECK: vucomxsh xmm2, word ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2e,0x94,0x87,0x23,0x01,0x00,0x00] + vucomxsh xmm2, word ptr [edi + 4*eax + 291] + +// CHECK: vucomxsh xmm2, word ptr [eax] +// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2e,0x10] + vucomxsh xmm2, word ptr [eax] + +// CHECK: vucomxsh xmm2, word ptr [2*ebp - 64] +// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2e,0x14,0x6d,0xc0,0xff,0xff,0xff] + vucomxsh xmm2, word ptr [2*ebp - 64] + +// CHECK: vucomxsh xmm2, word ptr [ecx + 254] +// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2e,0x51,0x7f] + vucomxsh xmm2, word ptr [ecx + 254] + +// CHECK: vucomxsh xmm2, word ptr [edx - 256] +// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2e,0x52,0x80] + vucomxsh xmm2, word ptr [edx - 256] + +// CHECK: vucomxss xmm2, xmm3 +// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2e,0xd3] + vucomxss xmm2, xmm3 + +// CHECK: vucomxss xmm2, xmm3, {sae} +// CHECK: encoding: [0x62,0xf1,0x7f,0x18,0x2e,0xd3] + vucomxss xmm2, xmm3, {sae} + +// CHECK: vucomxss xmm2, dword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2e,0x94,0xf4,0x00,0x00,0x00,0x10] + vucomxss xmm2, dword ptr [esp + 8*esi + 268435456] + +// CHECK: vucomxss xmm2, dword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2e,0x94,0x87,0x23,0x01,0x00,0x00] + vucomxss xmm2, dword ptr [edi + 4*eax + 291] + +// CHECK: vucomxss xmm2, dword ptr [eax] +// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2e,0x10] + vucomxss xmm2, dword ptr [eax] + +// CHECK: vucomxss xmm2, dword ptr [2*ebp - 128] +// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2e,0x14,0x6d,0x80,0xff,0xff,0xff] + vucomxss xmm2, dword ptr [2*ebp - 128] + +// CHECK: vucomxss xmm2, dword ptr [ecx + 508] +// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2e,0x51,0x7f] + vucomxss xmm2, dword ptr [ecx + 508] + +// CHECK: vucomxss xmm2, dword ptr [edx - 512] +// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2e,0x52,0x80] + vucomxss xmm2, dword ptr [edx - 512] + diff --git a/llvm/test/MC/X86/avx10.2-com-ef-64-att.s b/llvm/test/MC/X86/avx10.2-com-ef-64-att.s new file mode 100644 index 000000000000..2f3690537334 --- /dev/null +++ b/llvm/test/MC/X86/avx10.2-com-ef-64-att.s @@ -0,0 +1,194 @@ +// RUN: llvm-mc -triple x86_64 --show-encoding %s | FileCheck %s + +// CHECK: vcomxsd %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xa1,0xfe,0x08,0x2f,0xf7] + vcomxsd %xmm23, %xmm22 + +// CHECK: vcomxsd {sae}, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xa1,0xfe,0x18,0x2f,0xf7] + vcomxsd {sae}, %xmm23, %xmm22 + +// CHECK: vcomxsd 268435456(%rbp,%r14,8), %xmm22 +// CHECK: encoding: [0x62,0xa1,0xfe,0x08,0x2f,0xb4,0xf5,0x00,0x00,0x00,0x10] + vcomxsd 268435456(%rbp,%r14,8), %xmm22 + +// CHECK: vcomxsd 291(%r8,%rax,4), %xmm22 +// CHECK: encoding: [0x62,0xc1,0xfe,0x08,0x2f,0xb4,0x80,0x23,0x01,0x00,0x00] + vcomxsd 291(%r8,%rax,4), %xmm22 + +// CHECK: vcomxsd (%rip), %xmm22 +// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x2f,0x35,0x00,0x00,0x00,0x00] + vcomxsd (%rip), %xmm22 + +// CHECK: vcomxsd -256(,%rbp,2), %xmm22 +// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x2f,0x34,0x6d,0x00,0xff,0xff,0xff] + vcomxsd -256(,%rbp,2), %xmm22 + +// CHECK: vcomxsd 1016(%rcx), %xmm22 +// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x2f,0x71,0x7f] + vcomxsd 1016(%rcx), %xmm22 + +// CHECK: vcomxsd -1024(%rdx), %xmm22 +// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x2f,0x72,0x80] + vcomxsd -1024(%rdx), %xmm22 + +// CHECK: vcomxsh %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xa5,0x7f,0x08,0x2f,0xf7] + vcomxsh %xmm23, %xmm22 + +// CHECK: vcomxsh {sae}, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xa5,0x7f,0x18,0x2f,0xf7] + vcomxsh {sae}, %xmm23, %xmm22 + +// CHECK: vcomxsh 268435456(%rbp,%r14,8), %xmm22 +// CHECK: encoding: [0x62,0xa5,0x7f,0x08,0x2f,0xb4,0xf5,0x00,0x00,0x00,0x10] + vcomxsh 268435456(%rbp,%r14,8), %xmm22 + +// CHECK: vcomxsh 291(%r8,%rax,4), %xmm22 +// CHECK: encoding: [0x62,0xc5,0x7f,0x08,0x2f,0xb4,0x80,0x23,0x01,0x00,0x00] + vcomxsh 291(%r8,%rax,4), %xmm22 + +// CHECK: vcomxsh (%rip), %xmm22 +// CHECK: encoding: [0x62,0xe5,0x7f,0x08,0x2f,0x35,0x00,0x00,0x00,0x00] + vcomxsh (%rip), %xmm22 + +// CHECK: vcomxsh -64(,%rbp,2), %xmm22 +// CHECK: encoding: [0x62,0xe5,0x7f,0x08,0x2f,0x34,0x6d,0xc0,0xff,0xff,0xff] + vcomxsh -64(,%rbp,2), %xmm22 + +// CHECK: vcomxsh 254(%rcx), %xmm22 +// CHECK: encoding: [0x62,0xe5,0x7f,0x08,0x2f,0x71,0x7f] + vcomxsh 254(%rcx), %xmm22 + +// CHECK: vcomxsh -256(%rdx), %xmm22 +// CHECK: encoding: [0x62,0xe5,0x7f,0x08,0x2f,0x72,0x80] + vcomxsh -256(%rdx), %xmm22 + +// CHECK: vcomxss %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xa1,0x7f,0x08,0x2f,0xf7] + vcomxss %xmm23, %xmm22 + +// CHECK: vcomxss {sae}, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xa1,0x7f,0x18,0x2f,0xf7] + vcomxss {sae}, %xmm23, %xmm22 + +// CHECK: vcomxss 268435456(%rbp,%r14,8), %xmm22 +// CHECK: encoding: [0x62,0xa1,0x7f,0x08,0x2f,0xb4,0xf5,0x00,0x00,0x00,0x10] + vcomxss 268435456(%rbp,%r14,8), %xmm22 + +// CHECK: vcomxss 291(%r8,%rax,4), %xmm22 +// CHECK: encoding: [0x62,0xc1,0x7f,0x08,0x2f,0xb4,0x80,0x23,0x01,0x00,0x00] + vcomxss 291(%r8,%rax,4), %xmm22 + +// CHECK: vcomxss (%rip), %xmm22 +// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x2f,0x35,0x00,0x00,0x00,0x00] + vcomxss (%rip), %xmm22 + +// CHECK: vcomxss -128(,%rbp,2), %xmm22 +// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x2f,0x34,0x6d,0x80,0xff,0xff,0xff] + vcomxss -128(,%rbp,2), %xmm22 + +// CHECK: vcomxss 508(%rcx), %xmm22 +// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x2f,0x71,0x7f] + vcomxss 508(%rcx), %xmm22 + +// CHECK: vcomxss -512(%rdx), %xmm22 +// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x2f,0x72,0x80] + vcomxss -512(%rdx), %xmm22 + +// CHECK: vucomxsd %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xa1,0xfe,0x08,0x2e,0xf7] + vucomxsd %xmm23, %xmm22 + +// CHECK: vucomxsd {sae}, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xa1,0xfe,0x18,0x2e,0xf7] + vucomxsd {sae}, %xmm23, %xmm22 + +// CHECK: vucomxsd 268435456(%rbp,%r14,8), %xmm22 +// CHECK: encoding: [0x62,0xa1,0xfe,0x08,0x2e,0xb4,0xf5,0x00,0x00,0x00,0x10] + vucomxsd 268435456(%rbp,%r14,8), %xmm22 + +// CHECK: vucomxsd 291(%r8,%rax,4), %xmm22 +// CHECK: encoding: [0x62,0xc1,0xfe,0x08,0x2e,0xb4,0x80,0x23,0x01,0x00,0x00] + vucomxsd 291(%r8,%rax,4), %xmm22 + +// CHECK: vucomxsd (%rip), %xmm22 +// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x2e,0x35,0x00,0x00,0x00,0x00] + vucomxsd (%rip), %xmm22 + +// CHECK: vucomxsd -256(,%rbp,2), %xmm22 +// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x2e,0x34,0x6d,0x00,0xff,0xff,0xff] + vucomxsd -256(,%rbp,2), %xmm22 + +// CHECK: vucomxsd 1016(%rcx), %xmm22 +// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x2e,0x71,0x7f] + vucomxsd 1016(%rcx), %xmm22 + +// CHECK: vucomxsd -1024(%rdx), %xmm22 +// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x2e,0x72,0x80] + vucomxsd -1024(%rdx), %xmm22 + +// CHECK: vucomxsh %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xa5,0x7f,0x08,0x2e,0xf7] + vucomxsh %xmm23, %xmm22 + +// CHECK: vucomxsh {sae}, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xa5,0x7f,0x18,0x2e,0xf7] + vucomxsh {sae}, %xmm23, %xmm22 + +// CHECK: vucomxsh 268435456(%rbp,%r14,8), %xmm22 +// CHECK: encoding: [0x62,0xa5,0x7f,0x08,0x2e,0xb4,0xf5,0x00,0x00,0x00,0x10] + vucomxsh 268435456(%rbp,%r14,8), %xmm22 + +// CHECK: vucomxsh 291(%r8,%rax,4), %xmm22 +// CHECK: encoding: [0x62,0xc5,0x7f,0x08,0x2e,0xb4,0x80,0x23,0x01,0x00,0x00] + vucomxsh 291(%r8,%rax,4), %xmm22 + +// CHECK: vucomxsh (%rip), %xmm22 +// CHECK: encoding: [0x62,0xe5,0x7f,0x08,0x2e,0x35,0x00,0x00,0x00,0x00] + vucomxsh (%rip), %xmm22 + +// CHECK: vucomxsh -64(,%rbp,2), %xmm22 +// CHECK: encoding: [0x62,0xe5,0x7f,0x08,0x2e,0x34,0x6d,0xc0,0xff,0xff,0xff] + vucomxsh -64(,%rbp,2), %xmm22 + +// CHECK: vucomxsh 254(%rcx), %xmm22 +// CHECK: encoding: [0x62,0xe5,0x7f,0x08,0x2e,0x71,0x7f] + vucomxsh 254(%rcx), %xmm22 + +// CHECK: vucomxsh -256(%rdx), %xmm22 +// CHECK: encoding: [0x62,0xe5,0x7f,0x08,0x2e,0x72,0x80] + vucomxsh -256(%rdx), %xmm22 + +// CHECK: vucomxss %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xa1,0x7f,0x08,0x2e,0xf7] + vucomxss %xmm23, %xmm22 + +// CHECK: vucomxss {sae}, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xa1,0x7f,0x18,0x2e,0xf7] + vucomxss {sae}, %xmm23, %xmm22 + +// CHECK: vucomxss 268435456(%rbp,%r14,8), %xmm22 +// CHECK: encoding: [0x62,0xa1,0x7f,0x08,0x2e,0xb4,0xf5,0x00,0x00,0x00,0x10] + vucomxss 268435456(%rbp,%r14,8), %xmm22 + +// CHECK: vucomxss 291(%r8,%rax,4), %xmm22 +// CHECK: encoding: [0x62,0xc1,0x7f,0x08,0x2e,0xb4,0x80,0x23,0x01,0x00,0x00] + vucomxss 291(%r8,%rax,4), %xmm22 + +// CHECK: vucomxss (%rip), %xmm22 +// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x2e,0x35,0x00,0x00,0x00,0x00] + vucomxss (%rip), %xmm22 + +// CHECK: vucomxss -128(,%rbp,2), %xmm22 +// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x2e,0x34,0x6d,0x80,0xff,0xff,0xff] + vucomxss -128(,%rbp,2), %xmm22 + +// CHECK: vucomxss 508(%rcx), %xmm22 +// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x2e,0x71,0x7f] + vucomxss 508(%rcx), %xmm22 + +// CHECK: vucomxss -512(%rdx), %xmm22 +// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x2e,0x72,0x80] + vucomxss -512(%rdx), %xmm22 + diff --git a/llvm/test/MC/X86/avx10.2-com-ef-64-intel.s b/llvm/test/MC/X86/avx10.2-com-ef-64-intel.s new file mode 100644 index 000000000000..41aaf99270b8 --- /dev/null +++ b/llvm/test/MC/X86/avx10.2-com-ef-64-intel.s @@ -0,0 +1,194 @@ +// RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +// CHECK: vcomxsd xmm22, xmm23 +// CHECK: encoding: [0x62,0xa1,0xfe,0x08,0x2f,0xf7] + vcomxsd xmm22, xmm23 + +// CHECK: vcomxsd xmm22, xmm23, {sae} +// CHECK: encoding: [0x62,0xa1,0xfe,0x18,0x2f,0xf7] + vcomxsd xmm22, xmm23, {sae} + +// CHECK: vcomxsd xmm22, qword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa1,0xfe,0x08,0x2f,0xb4,0xf5,0x00,0x00,0x00,0x10] + vcomxsd xmm22, qword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vcomxsd xmm22, qword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc1,0xfe,0x08,0x2f,0xb4,0x80,0x23,0x01,0x00,0x00] + vcomxsd xmm22, qword ptr [r8 + 4*rax + 291] + +// CHECK: vcomxsd xmm22, qword ptr [rip] +// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x2f,0x35,0x00,0x00,0x00,0x00] + vcomxsd xmm22, qword ptr [rip] + +// CHECK: vcomxsd xmm22, qword ptr [2*rbp - 256] +// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x2f,0x34,0x6d,0x00,0xff,0xff,0xff] + vcomxsd xmm22, qword ptr [2*rbp - 256] + +// CHECK: vcomxsd xmm22, qword ptr [rcx + 1016] +// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x2f,0x71,0x7f] + vcomxsd xmm22, qword ptr [rcx + 1016] + +// CHECK: vcomxsd xmm22, qword ptr [rdx - 1024] +// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x2f,0x72,0x80] + vcomxsd xmm22, qword ptr [rdx - 1024] + +// CHECK: vcomxsh xmm22, xmm23 +// CHECK: encoding: [0x62,0xa5,0x7f,0x08,0x2f,0xf7] + vcomxsh xmm22, xmm23 + +// CHECK: vcomxsh xmm22, xmm23, {sae} +// CHECK: encoding: [0x62,0xa5,0x7f,0x18,0x2f,0xf7] + vcomxsh xmm22, xmm23, {sae} + +// CHECK: vcomxsh xmm22, word ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa5,0x7f,0x08,0x2f,0xb4,0xf5,0x00,0x00,0x00,0x10] + vcomxsh xmm22, word ptr [rbp + 8*r14 + 268435456] + +// CHECK: vcomxsh xmm22, word ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc5,0x7f,0x08,0x2f,0xb4,0x80,0x23,0x01,0x00,0x00] + vcomxsh xmm22, word ptr [r8 + 4*rax + 291] + +// CHECK: vcomxsh xmm22, word ptr [rip] +// CHECK: encoding: [0x62,0xe5,0x7f,0x08,0x2f,0x35,0x00,0x00,0x00,0x00] + vcomxsh xmm22, word ptr [rip] + +// CHECK: vcomxsh xmm22, word ptr [2*rbp - 64] +// CHECK: encoding: [0x62,0xe5,0x7f,0x08,0x2f,0x34,0x6d,0xc0,0xff,0xff,0xff] + vcomxsh xmm22, word ptr [2*rbp - 64] + +// CHECK: vcomxsh xmm22, word ptr [rcx + 254] +// CHECK: encoding: [0x62,0xe5,0x7f,0x08,0x2f,0x71,0x7f] + vcomxsh xmm22, word ptr [rcx + 254] + +// CHECK: vcomxsh xmm22, word ptr [rdx - 256] +// CHECK: encoding: [0x62,0xe5,0x7f,0x08,0x2f,0x72,0x80] + vcomxsh xmm22, word ptr [rdx - 256] + +// CHECK: vcomxss xmm22, xmm23 +// CHECK: encoding: [0x62,0xa1,0x7f,0x08,0x2f,0xf7] + vcomxss xmm22, xmm23 + +// CHECK: vcomxss xmm22, xmm23, {sae} +// CHECK: encoding: [0x62,0xa1,0x7f,0x18,0x2f,0xf7] + vcomxss xmm22, xmm23, {sae} + +// CHECK: vcomxss xmm22, dword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa1,0x7f,0x08,0x2f,0xb4,0xf5,0x00,0x00,0x00,0x10] + vcomxss xmm22, dword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vcomxss xmm22, dword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc1,0x7f,0x08,0x2f,0xb4,0x80,0x23,0x01,0x00,0x00] + vcomxss xmm22, dword ptr [r8 + 4*rax + 291] + +// CHECK: vcomxss xmm22, dword ptr [rip] +// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x2f,0x35,0x00,0x00,0x00,0x00] + vcomxss xmm22, dword ptr [rip] + +// CHECK: vcomxss xmm22, dword ptr [2*rbp - 128] +// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x2f,0x34,0x6d,0x80,0xff,0xff,0xff] + vcomxss xmm22, dword ptr [2*rbp - 128] + +// CHECK: vcomxss xmm22, dword ptr [rcx + 508] +// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x2f,0x71,0x7f] + vcomxss xmm22, dword ptr [rcx + 508] + +// CHECK: vcomxss xmm22, dword ptr [rdx - 512] +// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x2f,0x72,0x80] + vcomxss xmm22, dword ptr [rdx - 512] + +// CHECK: vucomxsd xmm22, xmm23 +// CHECK: encoding: [0x62,0xa1,0xfe,0x08,0x2e,0xf7] + vucomxsd xmm22, xmm23 + +// CHECK: vucomxsd xmm22, xmm23, {sae} +// CHECK: encoding: [0x62,0xa1,0xfe,0x18,0x2e,0xf7] + vucomxsd xmm22, xmm23, {sae} + +// CHECK: vucomxsd xmm22, qword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa1,0xfe,0x08,0x2e,0xb4,0xf5,0x00,0x00,0x00,0x10] + vucomxsd xmm22, qword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vucomxsd xmm22, qword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc1,0xfe,0x08,0x2e,0xb4,0x80,0x23,0x01,0x00,0x00] + vucomxsd xmm22, qword ptr [r8 + 4*rax + 291] + +// CHECK: vucomxsd xmm22, qword ptr [rip] +// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x2e,0x35,0x00,0x00,0x00,0x00] + vucomxsd xmm22, qword ptr [rip] + +// CHECK: vucomxsd xmm22, qword ptr [2*rbp - 256] +// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x2e,0x34,0x6d,0x00,0xff,0xff,0xff] + vucomxsd xmm22, qword ptr [2*rbp - 256] + +// CHECK: vucomxsd xmm22, qword ptr [rcx + 1016] +// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x2e,0x71,0x7f] + vucomxsd xmm22, qword ptr [rcx + 1016] + +// CHECK: vucomxsd xmm22, qword ptr [rdx - 1024] +// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x2e,0x72,0x80] + vucomxsd xmm22, qword ptr [rdx - 1024] + +// CHECK: vucomxsh xmm22, xmm23 +// CHECK: encoding: [0x62,0xa5,0x7f,0x08,0x2e,0xf7] + vucomxsh xmm22, xmm23 + +// CHECK: vucomxsh xmm22, xmm23, {sae} +// CHECK: encoding: [0x62,0xa5,0x7f,0x18,0x2e,0xf7] + vucomxsh xmm22, xmm23, {sae} + +// CHECK: vucomxsh xmm22, word ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa5,0x7f,0x08,0x2e,0xb4,0xf5,0x00,0x00,0x00,0x10] + vucomxsh xmm22, word ptr [rbp + 8*r14 + 268435456] + +// CHECK: vucomxsh xmm22, word ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc5,0x7f,0x08,0x2e,0xb4,0x80,0x23,0x01,0x00,0x00] + vucomxsh xmm22, word ptr [r8 + 4*rax + 291] + +// CHECK: vucomxsh xmm22, word ptr [rip] +// CHECK: encoding: [0x62,0xe5,0x7f,0x08,0x2e,0x35,0x00,0x00,0x00,0x00] + vucomxsh xmm22, word ptr [rip] + +// CHECK: vucomxsh xmm22, word ptr [2*rbp - 64] +// CHECK: encoding: [0x62,0xe5,0x7f,0x08,0x2e,0x34,0x6d,0xc0,0xff,0xff,0xff] + vucomxsh xmm22, word ptr [2*rbp - 64] + +// CHECK: vucomxsh xmm22, word ptr [rcx + 254] +// CHECK: encoding: [0x62,0xe5,0x7f,0x08,0x2e,0x71,0x7f] + vucomxsh xmm22, word ptr [rcx + 254] + +// CHECK: vucomxsh xmm22, word ptr [rdx - 256] +// CHECK: encoding: [0x62,0xe5,0x7f,0x08,0x2e,0x72,0x80] + vucomxsh xmm22, word ptr [rdx - 256] + +// CHECK: vucomxss xmm22, xmm23 +// CHECK: encoding: [0x62,0xa1,0x7f,0x08,0x2e,0xf7] + vucomxss xmm22, xmm23 + +// CHECK: vucomxss xmm22, xmm23, {sae} +// CHECK: encoding: [0x62,0xa1,0x7f,0x18,0x2e,0xf7] + vucomxss xmm22, xmm23, {sae} + +// CHECK: vucomxss xmm22, dword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa1,0x7f,0x08,0x2e,0xb4,0xf5,0x00,0x00,0x00,0x10] + vucomxss xmm22, dword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vucomxss xmm22, dword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc1,0x7f,0x08,0x2e,0xb4,0x80,0x23,0x01,0x00,0x00] + vucomxss xmm22, dword ptr [r8 + 4*rax + 291] + +// CHECK: vucomxss xmm22, dword ptr [rip] +// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x2e,0x35,0x00,0x00,0x00,0x00] + vucomxss xmm22, dword ptr [rip] + +// CHECK: vucomxss xmm22, dword ptr [2*rbp - 128] +// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x2e,0x34,0x6d,0x80,0xff,0xff,0xff] + vucomxss xmm22, dword ptr [2*rbp - 128] + +// CHECK: vucomxss xmm22, dword ptr [rcx + 508] +// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x2e,0x71,0x7f] + vucomxss xmm22, dword ptr [rcx + 508] + +// CHECK: vucomxss xmm22, dword ptr [rdx - 512] +// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x2e,0x72,0x80] + vucomxss xmm22, dword ptr [rdx - 512] + diff --git a/llvm/test/MC/X86/avx10.2-copy-32-att.s b/llvm/test/MC/X86/avx10.2-copy-32-att.s new file mode 100644 index 000000000000..2bc498720849 --- /dev/null +++ b/llvm/test/MC/X86/avx10.2-copy-32-att.s @@ -0,0 +1,82 @@ +// RUN: llvm-mc -triple i386 --show-encoding %s | FileCheck %s + +// CHECK: vmovd 268435456(%esp,%esi,8), %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x6e,0x94,0xf4,0x00,0x00,0x00,0x10] + vmovd 268435456(%esp,%esi,8), %xmm2 + +// CHECK: vmovd 291(%edi,%eax,4), %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x6e,0x94,0x87,0x23,0x01,0x00,0x00] + vmovd 291(%edi,%eax,4), %xmm2 + +// CHECK: vmovd (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x6e,0x10] + vmovd (%eax), %xmm2 + +// CHECK: vmovd -128(,%ebp,2), %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x6e,0x14,0x6d,0x80,0xff,0xff,0xff] + vmovd -128(,%ebp,2), %xmm2 + +// CHECK: vmovd %xmm3, 268435456(%esp,%esi,8) +// CHECK: encoding: [0xc5,0xf9,0x7e,0x9c,0xf4,0x00,0x00,0x00,0x10] + vmovd %xmm3, 268435456(%esp,%esi,8) + +// CHECK: vmovd %xmm3, 291(%edi,%eax,4) +// CHECK: encoding: [0xc5,0xf9,0x7e,0x9c,0x87,0x23,0x01,0x00,0x00] + vmovd %xmm3, 291(%edi,%eax,4) + +// CHECK: vmovd %xmm3, (%eax) +// CHECK: encoding: [0xc5,0xf9,0x7e,0x18] + vmovd %xmm3, (%eax) + +// CHECK: vmovd %xmm3, -128(,%ebp,2) +// CHECK: encoding: [0xc5,0xf9,0x7e,0x1c,0x6d,0x80,0xff,0xff,0xff] + vmovd %xmm3, -128(,%ebp,2) + +// CHECK: vmovw 268435456(%esp,%esi,8), %xmm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x94,0xf4,0x00,0x00,0x00,0x10] + vmovw 268435456(%esp,%esi,8), %xmm2 + +// CHECK: vmovw 291(%edi,%eax,4), %xmm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x94,0x87,0x23,0x01,0x00,0x00] + vmovw 291(%edi,%eax,4), %xmm2 + +// CHECK: vmovw (%eax), %xmm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x10] + vmovw (%eax), %xmm2 + +// CHECK: vmovw -64(,%ebp,2), %xmm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x14,0x6d,0xc0,0xff,0xff,0xff] + vmovw -64(,%ebp,2), %xmm2 + +// CHECK: vmovw 254(%ecx), %xmm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x51,0x7f] + vmovw 254(%ecx), %xmm2 + +// CHECK: vmovw -256(%edx), %xmm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x52,0x80] + vmovw -256(%edx), %xmm2 + +// CHECK: vmovw %xmm3, 268435456(%esp,%esi,8) +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x9c,0xf4,0x00,0x00,0x00,0x10] + vmovw %xmm3, 268435456(%esp,%esi,8) + +// CHECK: vmovw %xmm3, 291(%edi,%eax,4) +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x9c,0x87,0x23,0x01,0x00,0x00] + vmovw %xmm3, 291(%edi,%eax,4) + +// CHECK: vmovw %xmm3, (%eax) +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x18] + vmovw %xmm3, (%eax) + +// CHECK: vmovw %xmm3, -64(,%ebp,2) +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x1c,0x6d,0xc0,0xff,0xff,0xff] + vmovw %xmm3, -64(,%ebp,2) + +// CHECK: vmovw %xmm3, 254(%ecx) +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x59,0x7f] + vmovw %xmm3, 254(%ecx) + +// CHECK: vmovw %xmm3, -256(%edx) +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x5a,0x80] + vmovw %xmm3, -256(%edx) + diff --git a/llvm/test/MC/X86/avx10.2-copy-32-intel.s b/llvm/test/MC/X86/avx10.2-copy-32-intel.s new file mode 100644 index 000000000000..aa84548e5f75 --- /dev/null +++ b/llvm/test/MC/X86/avx10.2-copy-32-intel.s @@ -0,0 +1,81 @@ +// RUN: llvm-mc -triple i386 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +// CHECK: vmovd xmm2, dword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0xc5,0xf9,0x6e,0x94,0xf4,0x00,0x00,0x00,0x10] + vmovd xmm2, dword ptr [esp + 8*esi + 268435456] + +// CHECK: vmovd xmm2, dword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0xc5,0xf9,0x6e,0x94,0x87,0x23,0x01,0x00,0x00] + vmovd xmm2, dword ptr [edi + 4*eax + 291] + +// CHECK: vmovd xmm2, dword ptr [eax] +// CHECK: encoding: [0xc5,0xf9,0x6e,0x10] + vmovd xmm2, dword ptr [eax] + +// CHECK: vmovd xmm2, dword ptr [2*ebp - 128] +// CHECK: encoding: [0xc5,0xf9,0x6e,0x14,0x6d,0x80,0xff,0xff,0xff] + vmovd xmm2, dword ptr [2*ebp - 128] + +// CHECK: vmovd dword ptr [esp + 8*esi + 268435456], xmm3 +// CHECK: encoding: [0xc5,0xf9,0x7e,0x9c,0xf4,0x00,0x00,0x00,0x10] + vmovd dword ptr [esp + 8*esi + 268435456], xmm3 + +// CHECK: vmovd dword ptr [edi + 4*eax + 291], xmm3 +// CHECK: encoding: [0xc5,0xf9,0x7e,0x9c,0x87,0x23,0x01,0x00,0x00] + vmovd dword ptr [edi + 4*eax + 291], xmm3 + +// CHECK: vmovd dword ptr [eax], xmm3 +// CHECK: encoding: [0xc5,0xf9,0x7e,0x18] + vmovd dword ptr [eax], xmm3 + +// CHECK: vmovd dword ptr [2*ebp - 128], xmm3 +// CHECK: encoding: [0xc5,0xf9,0x7e,0x1c,0x6d,0x80,0xff,0xff,0xff] + vmovd dword ptr [2*ebp - 128], xmm3 + +// CHECK: vmovw xmm2, word ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x94,0xf4,0x00,0x00,0x00,0x10] + vmovw xmm2, word ptr [esp + 8*esi + 268435456] + +// CHECK: vmovw xmm2, word ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x94,0x87,0x23,0x01,0x00,0x00] + vmovw xmm2, word ptr [edi + 4*eax + 291] + +// CHECK: vmovw xmm2, word ptr [eax] +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x10] + vmovw xmm2, word ptr [eax] + +// CHECK: vmovw xmm2, word ptr [2*ebp - 64] +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x14,0x6d,0xc0,0xff,0xff,0xff] + vmovw xmm2, word ptr [2*ebp - 64] + +// CHECK: vmovw xmm2, word ptr [ecx + 254] +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x51,0x7f] + vmovw xmm2, word ptr [ecx + 254] + +// CHECK: vmovw xmm2, word ptr [edx - 256] +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x52,0x80] + vmovw xmm2, word ptr [edx - 256] + +// CHECK: vmovw word ptr [esp + 8*esi + 268435456], xmm3 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x9c,0xf4,0x00,0x00,0x00,0x10] + vmovw word ptr [esp + 8*esi + 268435456], xmm3 + +// CHECK: vmovw word ptr [edi + 4*eax + 291], xmm3 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x9c,0x87,0x23,0x01,0x00,0x00] + vmovw word ptr [edi + 4*eax + 291], xmm3 + +// CHECK: vmovw word ptr [eax], xmm3 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x18] + vmovw word ptr [eax], xmm3 + +// CHECK: vmovw word ptr [2*ebp - 64], xmm3 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x1c,0x6d,0xc0,0xff,0xff,0xff] + vmovw word ptr [2*ebp - 64], xmm3 + +// CHECK: vmovw word ptr [ecx + 254], xmm3 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x59,0x7f] + vmovw word ptr [ecx + 254], xmm3 + +// CHECK: vmovw word ptr [edx - 256], xmm3 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x5a,0x80] + vmovw word ptr [edx - 256], xmm3 diff --git a/llvm/test/MC/X86/avx10.2-copy-64-att.s b/llvm/test/MC/X86/avx10.2-copy-64-att.s new file mode 100644 index 000000000000..a672b2d84224 --- /dev/null +++ b/llvm/test/MC/X86/avx10.2-copy-64-att.s @@ -0,0 +1,97 @@ +// RUN: llvm-mc -triple x86_64 --show-encoding %s | FileCheck %s + +// CHECK: vmovd 268435456(%rbp,%r14,8), %xmm22 +// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0x6e,0xb4,0xf5,0x00,0x00,0x00,0x10] + vmovd 268435456(%rbp,%r14,8), %xmm22 + +// CHECK: vmovd 291(%r8,%rax,4), %xmm22 +// CHECK: encoding: [0x62,0xc1,0x7d,0x08,0x6e,0xb4,0x80,0x23,0x01,0x00,0x00] + vmovd 291(%r8,%rax,4), %xmm22 + +// CHECK: vmovd (%rip), %xmm22 +// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x6e,0x35,0x00,0x00,0x00,0x00] + vmovd (%rip), %xmm22 + +// CHECK: vmovd -128(,%rbp,2), %xmm22 +// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x6e,0x34,0x6d,0x80,0xff,0xff,0xff] + vmovd -128(,%rbp,2), %xmm22 + +// CHECK: vmovd 508(%rcx), %xmm22 +// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x6e,0x71,0x7f] + vmovd 508(%rcx), %xmm22 + +// CHECK: vmovd -512(%rdx), %xmm22 +// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x6e,0x72,0x80] + vmovd -512(%rdx), %xmm22 + +// CHECK: vmovd %xmm23, 268435456(%rbp,%r14,8) +// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0x7e,0xbc,0xf5,0x00,0x00,0x00,0x10] + vmovd %xmm23, 268435456(%rbp,%r14,8) + +// CHECK: vmovd %xmm23, 291(%r8,%rax,4) +// CHECK: encoding: [0x62,0xc1,0x7d,0x08,0x7e,0xbc,0x80,0x23,0x01,0x00,0x00] + vmovd %xmm23, 291(%r8,%rax,4) + +// CHECK: vmovd %xmm23, (%rip) +// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x3d,0x00,0x00,0x00,0x00] + vmovd %xmm23, (%rip) + +// CHECK: vmovd %xmm23, -128(,%rbp,2) +// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x3c,0x6d,0x80,0xff,0xff,0xff] + vmovd %xmm23, -128(,%rbp,2) + +// CHECK: vmovd %xmm23, 508(%rcx) +// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x79,0x7f] + vmovd %xmm23, 508(%rcx) + +// CHECK: vmovd %xmm23, -512(%rdx) +// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x7a,0x80] + vmovd %xmm23, -512(%rdx) + +// CHECK: vmovw 268435456(%rbp,%r14,8), %xmm22 +// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x6e,0xb4,0xf5,0x00,0x00,0x00,0x10] + vmovw 268435456(%rbp,%r14,8), %xmm22 + +// CHECK: vmovw 291(%r8,%rax,4), %xmm22 +// CHECK: encoding: [0x62,0xc5,0x7d,0x08,0x6e,0xb4,0x80,0x23,0x01,0x00,0x00] + vmovw 291(%r8,%rax,4), %xmm22 + +// CHECK: vmovw (%rip), %xmm22 +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6e,0x35,0x00,0x00,0x00,0x00] + vmovw (%rip), %xmm22 + +// CHECK: vmovw -64(,%rbp,2), %xmm22 +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6e,0x34,0x6d,0xc0,0xff,0xff,0xff] + vmovw -64(,%rbp,2), %xmm22 + +// CHECK: vmovw 254(%rcx), %xmm22 +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6e,0x71,0x7f] + vmovw 254(%rcx), %xmm22 + +// CHECK: vmovw -256(%rdx), %xmm22 +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6e,0x72,0x80] + vmovw -256(%rdx), %xmm22 + +// CHECK: vmovw %xmm23, 268435456(%rbp,%r14,8) +// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x7e,0xbc,0xf5,0x00,0x00,0x00,0x10] + vmovw %xmm23, 268435456(%rbp,%r14,8) + +// CHECK: vmovw %xmm23, 291(%r8,%rax,4) +// CHECK: encoding: [0x62,0xc5,0x7d,0x08,0x7e,0xbc,0x80,0x23,0x01,0x00,0x00] + vmovw %xmm23, 291(%r8,%rax,4) + +// CHECK: vmovw %xmm23, (%rip) +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x7e,0x3d,0x00,0x00,0x00,0x00] + vmovw %xmm23, (%rip) + +// CHECK: vmovw %xmm23, -64(,%rbp,2) +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x7e,0x3c,0x6d,0xc0,0xff,0xff,0xff] + vmovw %xmm23, -64(,%rbp,2) + +// CHECK: vmovw %xmm23, 254(%rcx) +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x7e,0x79,0x7f] + vmovw %xmm23, 254(%rcx) + +// CHECK: vmovw %xmm23, -256(%rdx) +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x7e,0x7a,0x80] + vmovw %xmm23, -256(%rdx) diff --git a/llvm/test/MC/X86/avx10.2-copy-64-intel.s b/llvm/test/MC/X86/avx10.2-copy-64-intel.s new file mode 100644 index 000000000000..4fd7b67dfa5d --- /dev/null +++ b/llvm/test/MC/X86/avx10.2-copy-64-intel.s @@ -0,0 +1,97 @@ +// RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +// CHECK: vmovd xmm22, dword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0x6e,0xb4,0xf5,0x00,0x00,0x00,0x10] + vmovd xmm22, dword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vmovd xmm22, dword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc1,0x7d,0x08,0x6e,0xb4,0x80,0x23,0x01,0x00,0x00] + vmovd xmm22, dword ptr [r8 + 4*rax + 291] + +// CHECK: vmovd xmm22, dword ptr [rip] +// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x6e,0x35,0x00,0x00,0x00,0x00] + vmovd xmm22, dword ptr [rip] + +// CHECK: vmovd xmm22, dword ptr [2*rbp - 128] +// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x6e,0x34,0x6d,0x80,0xff,0xff,0xff] + vmovd xmm22, dword ptr [2*rbp - 128] + +// CHECK: vmovd xmm22, dword ptr [rcx + 508] +// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x6e,0x71,0x7f] + vmovd xmm22, dword ptr [rcx + 508] + +// CHECK: vmovd xmm22, dword ptr [rdx - 512] +// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x6e,0x72,0x80] + vmovd xmm22, dword ptr [rdx - 512] + +// CHECK: vmovd dword ptr [rbp + 8*r14 + 268435456], xmm23 +// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0x7e,0xbc,0xf5,0x00,0x00,0x00,0x10] + vmovd dword ptr [rbp + 8*r14 + 268435456], xmm23 + +// CHECK: vmovd dword ptr [r8 + 4*rax + 291], xmm23 +// CHECK: encoding: [0x62,0xc1,0x7d,0x08,0x7e,0xbc,0x80,0x23,0x01,0x00,0x00] + vmovd dword ptr [r8 + 4*rax + 291], xmm23 + +// CHECK: vmovd dword ptr [rip], xmm23 +// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x3d,0x00,0x00,0x00,0x00] + vmovd dword ptr [rip], xmm23 + +// CHECK: vmovd dword ptr [2*rbp - 128], xmm23 +// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x3c,0x6d,0x80,0xff,0xff,0xff] + vmovd dword ptr [2*rbp - 128], xmm23 + +// CHECK: vmovd dword ptr [rcx + 508], xmm23 +// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x79,0x7f] + vmovd dword ptr [rcx + 508], xmm23 + +// CHECK: vmovd dword ptr [rdx - 512], xmm23 +// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x7a,0x80] + vmovd dword ptr [rdx - 512], xmm23 + +// CHECK: vmovw xmm22, word ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x6e,0xb4,0xf5,0x00,0x00,0x00,0x10] + vmovw xmm22, word ptr [rbp + 8*r14 + 268435456] + +// CHECK: vmovw xmm22, word ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc5,0x7d,0x08,0x6e,0xb4,0x80,0x23,0x01,0x00,0x00] + vmovw xmm22, word ptr [r8 + 4*rax + 291] + +// CHECK: vmovw xmm22, word ptr [rip] +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6e,0x35,0x00,0x00,0x00,0x00] + vmovw xmm22, word ptr [rip] + +// CHECK: vmovw xmm22, word ptr [2*rbp - 64] +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6e,0x34,0x6d,0xc0,0xff,0xff,0xff] + vmovw xmm22, word ptr [2*rbp - 64] + +// CHECK: vmovw xmm22, word ptr [rcx + 254] +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6e,0x71,0x7f] + vmovw xmm22, word ptr [rcx + 254] + +// CHECK: vmovw xmm22, word ptr [rdx - 256] +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6e,0x72,0x80] + vmovw xmm22, word ptr [rdx - 256] + +// CHECK: vmovw word ptr [rbp + 8*r14 + 268435456], xmm23 +// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x7e,0xbc,0xf5,0x00,0x00,0x00,0x10] + vmovw word ptr [rbp + 8*r14 + 268435456], xmm23 + +// CHECK: vmovw word ptr [r8 + 4*rax + 291], xmm23 +// CHECK: encoding: [0x62,0xc5,0x7d,0x08,0x7e,0xbc,0x80,0x23,0x01,0x00,0x00] + vmovw word ptr [r8 + 4*rax + 291], xmm23 + +// CHECK: vmovw word ptr [rip], xmm23 +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x7e,0x3d,0x00,0x00,0x00,0x00] + vmovw word ptr [rip], xmm23 + +// CHECK: vmovw word ptr [2*rbp - 64], xmm23 +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x7e,0x3c,0x6d,0xc0,0xff,0xff,0xff] + vmovw word ptr [2*rbp - 64], xmm23 + +// CHECK: vmovw word ptr [rcx + 254], xmm23 +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x7e,0x79,0x7f] + vmovw word ptr [rcx + 254], xmm23 + +// CHECK: vmovw word ptr [rdx - 256], xmm23 +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x7e,0x7a,0x80] + vmovw word ptr [rdx - 256], xmm23 diff --git a/llvm/test/MachineVerifier/test_g_insert_subvector.mir b/llvm/test/MachineVerifier/test_g_insert_subvector.mir index 9fce3c3e842d..62ddd28919b2 100644 --- a/llvm/test/MachineVerifier/test_g_insert_subvector.mir +++ b/llvm/test/MachineVerifier/test_g_insert_subvector.mir @@ -41,4 +41,7 @@ body: | ; CHECK: Index must be a multiple of the second source vector's minimum vector length %13:_(<vscale x 4 x s32>) = G_INSERT_SUBVECTOR %12, %1, 3 + + ; CHECK: Index must be a multiple of the second source vector's minimum vector length + %13:_(<vscale x 4 x s32>) = G_INSERT_SUBVECTOR %12, %1, 1 ... diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc index e85708ac1cc4..85d9b02ac0cb 100644 --- a/llvm/test/TableGen/x86-fold-tables.inc +++ b/llvm/test/TableGen/x86-fold-tables.inc @@ -1178,6 +1178,9 @@ static const X86FoldTableEntry Table1[] = { {X86::VCOMISSrr_Int, X86::VCOMISSrm_Int, TB_NO_REVERSE}, {X86::VCOMSBF16Zrr, X86::VCOMSBF16Zrm, 0}, {X86::VCOMSBF16Zrr_Int, X86::VCOMSBF16Zrm_Int, TB_NO_REVERSE}, + {X86::VCOMXSDZrr_Int, X86::VCOMXSDZrm_Int, TB_NO_REVERSE}, + {X86::VCOMXSHZrr_Int, X86::VCOMXSHZrm_Int, TB_NO_REVERSE}, + {X86::VCOMXSSZrr_Int, X86::VCOMXSSZrm_Int, TB_NO_REVERSE}, {X86::VCVTDQ2PDYrr, X86::VCVTDQ2PDYrm, 0}, {X86::VCVTDQ2PDZ128rr, X86::VCVTDQ2PDZ128rm, TB_NO_REVERSE}, {X86::VCVTDQ2PDZ256rr, X86::VCVTDQ2PDZ256rm, 0}, @@ -1614,8 +1617,10 @@ static const X86FoldTableEntry Table1[] = { {X86::VMOVUPSZrr, X86::VMOVUPSZrm, 0}, {X86::VMOVUPSrr, X86::VMOVUPSrm, 0}, {X86::VMOVW2SHrr, X86::VMOVWrm, TB_NO_REVERSE}, + {X86::VMOVZPDILo2PDIZrr, X86::VMOVZPDILo2PDIZrm, TB_NO_REVERSE}, {X86::VMOVZPQILo2PQIZrr, X86::VMOVQI2PQIZrm, TB_NO_REVERSE}, {X86::VMOVZPQILo2PQIrr, X86::VMOVQI2PQIrm, TB_NO_REVERSE}, + {X86::VMOVZPWILo2PWIZrr, X86::VMOVZPWILo2PWIZrm, TB_NO_REVERSE}, {X86::VPABSBYrr, X86::VPABSBYrm, 0}, {X86::VPABSBZ128rr, X86::VPABSBZ128rm, 0}, {X86::VPABSBZ256rr, X86::VPABSBZ256rm, 0}, @@ -1954,6 +1959,9 @@ static const X86FoldTableEntry Table1[] = { {X86::VUCOMISSZrr_Int, X86::VUCOMISSZrm_Int, TB_NO_REVERSE}, {X86::VUCOMISSrr, X86::VUCOMISSrm, 0}, {X86::VUCOMISSrr_Int, X86::VUCOMISSrm_Int, TB_NO_REVERSE}, + {X86::VUCOMXSDZrr_Int, X86::VUCOMXSDZrm_Int, TB_NO_REVERSE}, + {X86::VUCOMXSHZrr_Int, X86::VUCOMXSHZrm_Int, TB_NO_REVERSE}, + {X86::VUCOMXSSZrr_Int, X86::VUCOMXSSZrm_Int, TB_NO_REVERSE}, {X86::XOR16ri8_ND, X86::XOR16mi8_ND, 0}, {X86::XOR16ri8_NF_ND, X86::XOR16mi8_NF_ND, 0}, {X86::XOR16ri_ND, X86::XOR16mi_ND, 0}, diff --git a/llvm/test/ThinLTO/X86/ctxprof.ll b/llvm/test/ThinLTO/X86/ctxprof.ll index 1e30b90ec23d..4baea3b25890 100644 --- a/llvm/test/ThinLTO/X86/ctxprof.ll +++ b/llvm/test/ThinLTO/X86/ctxprof.ll @@ -47,18 +47,21 @@ ; NOPROF-1-NOT: m2_f1() ; NOPROF-2-NOT: m1_f1() ; -; The run with workload definitions - same other options. +; The run with workload definitions - same other options. We do need to re-generate the .bc +; files, to include instrumentation. +; RUN: opt -module-summary -passes=assign-guid,ctx-instr-gen %t/m1.ll -o %t/m1-instr.bc +; RUN: opt -module-summary -passes=assign-guid,ctx-instr-gen %t/m2.ll -o %t/m2-instr.bc ; ; RUN: echo '[ \ ; RUN: {"Guid": 6019442868614718803, "Counters": [1], "Callsites": [[{"Guid": 15593096274670919754, "Counters": [1]}]]}, \ ; RUN: {"Guid": 15593096274670919754, "Counters": [1], "Callsites": [[{"Guid": 6019442868614718803, "Counters": [1]}]]} \ ; RUN: ]' > %t_exp/ctxprof.json ; RUN: llvm-ctxprof-util fromJSON --input %t_exp/ctxprof.json --output %t_exp/ctxprof.bitstream -; RUN: llvm-lto2 run %t/m1.bc %t/m2.bc \ +; RUN: llvm-lto2 run %t/m1-instr.bc %t/m2-instr.bc \ ; RUN: -o %t_exp/result.o -save-temps \ ; RUN: -use-ctx-profile=%t_exp/ctxprof.bitstream \ -; RUN: -r %t/m1.bc,m1_f1,plx \ -; RUN: -r %t/m2.bc,m2_f1,plx +; RUN: -r %t/m1-instr.bc,m1_f1,plx \ +; RUN: -r %t/m2-instr.bc,m2_f1,plx ; RUN: llvm-dis %t_exp/result.o.1.3.import.bc -o - | FileCheck %s --check-prefix=FIRST ; RUN: llvm-dis %t_exp/result.o.2.3.import.bc -o - | FileCheck %s --check-prefix=SECOND ; diff --git a/llvm/test/Transforms/InferFunctionAttrs/annotate.ll b/llvm/test/Transforms/InferFunctionAttrs/annotate.ll index bc0d7a509e1f..40c512c81f0c 100644 --- a/llvm/test/Transforms/InferFunctionAttrs/annotate.ll +++ b/llvm/test/Transforms/InferFunctionAttrs/annotate.ll @@ -1106,6 +1106,15 @@ declare void @__cxa_throw(ptr, ptr, ptr) ; CHECK: declare void @_ZSt9terminatev() [[NOFREE_COLD_NORETURN:#[0-9]+]] declare void @_ZSt9terminatev() +; CHECK: declare void @sincos(double, ptr nocapture writeonly, ptr nocapture writeonly) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] +declare void @sincos(double, ptr, ptr) + +; CHECK: declare void @sincosf(float, ptr nocapture writeonly, ptr nocapture writeonly) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] +declare void @sincosf(float, ptr, ptr) + +; CHECK: declare void @sincosl(x86_fp80, ptr nocapture writeonly, ptr nocapture writeonly) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] +declare void @sincosl(x86_fp80, ptr, ptr) + ; memset_pattern{4,8,16} aren't available everywhere. ; CHECK-DARWIN: declare void @memset_pattern4(ptr nocapture writeonly, ptr nocapture readonly, i64) [[ARGMEMONLY_NOFREE_NOUNWIND_WILLRETURN]] declare void @memset_pattern4(ptr, ptr, i64) diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll index f3a3b8c1dc5d..fabf8ab51764 100644 --- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll @@ -1161,6 +1161,85 @@ define <2 x half> @constant_rtz_pkrtz() { ret <2 x half> %cvt } +define <2 x half> @fpext_const_cvt_pkrtz(half %x) { +; CHECK-LABEL: @fpext_const_cvt_pkrtz( +; CHECK-NEXT: [[CVT:%.*]] = insertelement <2 x half> <half poison, half 0xH4200>, half [[X:%.*]], i64 0 +; CHECK-NEXT: ret <2 x half> [[CVT]] +; + %ext = fpext half %x to float + %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %ext, float 3.0) + ret <2 x half> %cvt +} + +define <2 x half> @const_fpext_cvt_pkrtz(half %y) { +; CHECK-LABEL: @const_fpext_cvt_pkrtz( +; CHECK-NEXT: [[CVT:%.*]] = insertelement <2 x half> <half 0xH4500, half poison>, half [[Y:%.*]], i64 1 +; CHECK-NEXT: ret <2 x half> [[CVT]] +; + %ext = fpext half %y to float + %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 5.0, float %ext) + ret <2 x half> %cvt +} + +define <2 x half> @const_fpext_multi_cvt_pkrtz(half %y) { +; CHECK-LABEL: @const_fpext_multi_cvt_pkrtz( +; CHECK-NEXT: [[CVT1:%.*]] = insertelement <2 x half> <half 0xH4500, half poison>, half [[Y:%.*]], i64 1 +; CHECK-NEXT: [[CVT2:%.*]] = insertelement <2 x half> <half 0xH4200, half poison>, half [[Y]], i64 1 +; CHECK-NEXT: [[ADD:%.*]] = fadd <2 x half> [[CVT1]], [[CVT2]] +; CHECK-NEXT: ret <2 x half> [[ADD]] +; + %ext = fpext half %y to float + %cvt1 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 5.0, float %ext) + %cvt2 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 3.0, float %ext) + %add = fadd <2 x half> %cvt1, %cvt2 + ret <2 x half> %add +} + +define <2 x half> @fpext_fpext_cvt_pkrtz(half %x, half %y) { +; CHECK-LABEL: @fpext_fpext_cvt_pkrtz( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x half> poison, half [[X:%.*]], i64 0 +; CHECK-NEXT: [[CVT:%.*]] = insertelement <2 x half> [[TMP1]], half [[Y:%.*]], i64 1 +; CHECK-NEXT: ret <2 x half> [[CVT]] +; + %extx = fpext half %x to float + %exty = fpext half %y to float + %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %extx, float %exty) + ret <2 x half> %cvt +} + +define <2 x half> @fpext_fpext_bf16_cvt_pkrtz(bfloat %x, bfloat %y) { +; CHECK-LABEL: @fpext_fpext_bf16_cvt_pkrtz( +; CHECK-NEXT: [[EXTX:%.*]] = fpext bfloat [[X:%.*]] to float +; CHECK-NEXT: [[EXTY:%.*]] = fpext bfloat [[Y:%.*]] to float +; CHECK-NEXT: [[CVT:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float [[EXTX]], float [[EXTY]]) +; CHECK-NEXT: ret <2 x half> [[CVT]] +; + %extx = fpext bfloat %x to float + %exty = fpext bfloat %y to float + %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %extx, float %exty) + ret <2 x half> %cvt +} + +define <2 x half> @poison_fpext_cvt_pkrtz(half %y) { +; CHECK-LABEL: @poison_fpext_cvt_pkrtz( +; CHECK-NEXT: [[CVT:%.*]] = insertelement <2 x half> poison, half [[Y:%.*]], i64 1 +; CHECK-NEXT: ret <2 x half> [[CVT]] +; + %ext = fpext half %y to float + %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float poison, float %ext) + ret <2 x half> %cvt +} + +define <2 x half> @fpext_poison_cvt_pkrtz(half %x) { +; CHECK-LABEL: @fpext_poison_cvt_pkrtz( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x half> poison, half [[X:%.*]], i64 0 +; CHECK-NEXT: ret <2 x half> [[TMP1]] +; + %ext = fpext half %x to float + %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %ext, float poison) + ret <2 x half> %cvt +} + ; -------------------------------------------------------------------- ; llvm.amdgcn.cvt.pknorm.i16 ; -------------------------------------------------------------------- diff --git a/llvm/test/Transforms/InstCombine/bitcast.ll b/llvm/test/Transforms/InstCombine/bitcast.ll index 4ab24ce7b925..79e6370b7242 100644 --- a/llvm/test/Transforms/InstCombine/bitcast.ll +++ b/llvm/test/Transforms/InstCombine/bitcast.ll @@ -879,3 +879,26 @@ define half @copysign_idiom_constant_wrong_type2(bfloat %x, i16 %mag) { %y = bitcast i16 %res to half ret half %y } + +define i16 @bitcast_undef_to_vector() { +; CHECK-LABEL: @bitcast_undef_to_vector( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[END:%.*]] +; CHECK: unreachable: +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: ret i16 undef +; +entry: + br label %end + +unreachable: ; No predecessors! + %0 = extractvalue { i32, i32 } zeroinitializer, 1 + br label %end + +end: ; preds = %unreachable, %entry + %1 = phi i32 [ %0, %unreachable ], [ undef, %entry ] + %2 = bitcast i32 %1 to <2 x i16> + %3 = extractelement <2 x i16> %2, i64 0 + ret i16 %3 +} diff --git a/llvm/test/Transforms/InstCombine/fmod.ll b/llvm/test/Transforms/InstCombine/fmod.ll index c021d27e95fa..10cff189b8df 100644 --- a/llvm/test/Transforms/InstCombine/fmod.ll +++ b/llvm/test/Transforms/InstCombine/fmod.ll @@ -9,7 +9,7 @@ define float @test_inf_const(float %f) { ; CHECK-NEXT: [[ISINF:%.*]] = fcmp oeq float [[ABS]], 0x7FF0000000000000 ; CHECK-NEXT: br i1 [[ISINF]], label [[RETURN:%.*]], label [[IF_END:%.*]] ; CHECK: if.end: -; CHECK-NEXT: [[CALL:%.*]] = tail call float @fmodf(float [[F]], float 2.000000e+00) +; CHECK-NEXT: [[CALL:%.*]] = frem nnan float [[F]], 2.000000e+00 ; CHECK-NEXT: ret float [[CALL]] ; CHECK: return: ; CHECK-NEXT: ret float 0.000000e+00 @@ -34,7 +34,7 @@ define float @test_const_zero(float %f) { ; CHECK-NEXT: [[ISZERO:%.*]] = fcmp oeq float [[F]], 0.000000e+00 ; CHECK-NEXT: br i1 [[ISZERO]], label [[RETURN:%.*]], label [[IF_END:%.*]] ; CHECK: if.end: -; CHECK-NEXT: [[CALL:%.*]] = tail call float @fmodf(float 2.000000e+00, float [[F]]) +; CHECK-NEXT: [[CALL:%.*]] = frem nnan float 2.000000e+00, [[F]] ; CHECK-NEXT: ret float [[CALL]] ; CHECK: return: ; CHECK-NEXT: ret float 0.000000e+00 @@ -67,11 +67,11 @@ define float @test_noinf_nozero(float nofpclass(inf) %f, float nofpclass(zero) % ; CHECK-LABEL: define float @test_noinf_nozero( ; CHECK-SAME: float nofpclass(inf) [[F:%.*]], float nofpclass(zero) [[G:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call nnan float @fmodf(float [[F]], float [[G]]) +; CHECK-NEXT: [[CALL:%.*]] = frem nnan float [[F]], [[G]] ; CHECK-NEXT: ret float [[CALL]] ; entry: - %call = tail call nnan float @fmodf(float %f, float %g) + %call = tail call float @fmodf(float %f, float %g) ret float %call } @@ -79,7 +79,7 @@ define double @test_double(double nofpclass(inf) %f, double nofpclass(zero) %g) ; CHECK-LABEL: define double @test_double( ; CHECK-SAME: double nofpclass(inf) [[F:%.*]], double nofpclass(zero) [[G:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call double @fmod(double [[F]], double [[G]]) +; CHECK-NEXT: [[CALL:%.*]] = frem nnan double [[F]], [[G]] ; CHECK-NEXT: ret double [[CALL]] ; entry: @@ -91,7 +91,7 @@ define fp128 @test_fp128(fp128 nofpclass(inf) %f, fp128 nofpclass(zero) %g) { ; CHECK-LABEL: define fp128 @test_fp128( ; CHECK-SAME: fp128 nofpclass(inf) [[F:%.*]], fp128 nofpclass(zero) [[G:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call fp128 @fmodl(fp128 [[F]], fp128 [[G]]) +; CHECK-NEXT: [[CALL:%.*]] = frem nnan fp128 [[F]], [[G]] ; CHECK-NEXT: ret fp128 [[CALL]] ; entry: @@ -103,11 +103,11 @@ define float @test_noinf_nozero_dazpreservesign(float nofpclass(inf) %f, float n ; CHECK-LABEL: define float @test_noinf_nozero_dazpreservesign( ; CHECK-SAME: float nofpclass(inf) [[F:%.*]], float nofpclass(zero) [[G:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call nnan float @fmodf(float [[F]], float [[G]]) +; CHECK-NEXT: [[CALL:%.*]] = tail call float @fmodf(float [[F]], float [[G]]) ; CHECK-NEXT: ret float [[CALL]] ; entry: - %call = tail call nnan float @fmodf(float %f, float %g) + %call = tail call float @fmodf(float %f, float %g) ret float %call } @@ -115,7 +115,19 @@ define float @test_noinf_nozero_dazdynamic(float nofpclass(inf) %f, float nofpcl ; CHECK-LABEL: define float @test_noinf_nozero_dazdynamic( ; CHECK-SAME: float nofpclass(inf) [[F:%.*]], float nofpclass(zero) [[G:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call nnan float @fmodf(float [[F]], float [[G]]) +; CHECK-NEXT: [[CALL:%.*]] = tail call float @fmodf(float [[F]], float [[G]]) +; CHECK-NEXT: ret float [[CALL]] +; +entry: + %call = tail call float @fmodf(float %f, float %g) + ret float %call +} + +define float @test_nnan(float %f, float %g) { +; CHECK-LABEL: define float @test_nnan( +; CHECK-SAME: float [[F:%.*]], float [[G:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = frem nnan float [[F]], [[G]] ; CHECK-NEXT: ret float [[CALL]] ; entry: diff --git a/llvm/test/Transforms/InstCombine/icmp-shl-nuw.ll b/llvm/test/Transforms/InstCombine/icmp-shl-nuw.ll index 57c3abc7b984..9f50265004f0 100644 --- a/llvm/test/Transforms/InstCombine/icmp-shl-nuw.ll +++ b/llvm/test/Transforms/InstCombine/icmp-shl-nuw.ll @@ -90,3 +90,109 @@ define <2 x i1> @icmp_ugt_16x2(<2 x i32>) { %d = icmp ugt <2 x i32> %c, <i32 1048575, i32 1048575> ret <2 x i1> %d } + +define i1 @fold_icmp_shl_nuw_c1(i32 %x) { +; CHECK-LABEL: @fold_icmp_shl_nuw_c1( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 61440 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %lshr = lshr i32 %x, 12 + %and = and i32 %lshr, 15 + %shl = shl nuw i32 2, %and + %cmp = icmp ult i32 %shl, 4 + ret i1 %cmp +} + +define i1 @fold_icmp_shl_nuw_c2(i32 %x) { +; CHECK-LABEL: @fold_icmp_shl_nuw_c2( +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[X:%.*]], 2 +; CHECK-NEXT: ret i1 [[CMP]] +; + %shl = shl nuw i32 16, %x + %cmp = icmp ult i32 %shl, 64 + ret i1 %cmp +} + +define i1 @fold_icmp_shl_nuw_c2_non_pow2(i32 %x) { +; CHECK-LABEL: @fold_icmp_shl_nuw_c2_non_pow2( +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[X:%.*]], 2 +; CHECK-NEXT: ret i1 [[CMP]] +; + %shl = shl nuw i32 48, %x + %cmp = icmp ult i32 %shl, 192 + ret i1 %cmp +} + +define i1 @fold_icmp_shl_nuw_c2_div_non_pow2(i32 %x) { +; CHECK-LABEL: @fold_icmp_shl_nuw_c2_div_non_pow2( +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[X:%.*]], 5 +; CHECK-NEXT: ret i1 [[CMP]] +; + %shl = shl nuw i32 2, %x + %cmp = icmp ult i32 %shl, 60 + ret i1 %cmp +} + +define i1 @fold_icmp_shl_nuw_c3(i32 %x) { +; CHECK-LABEL: @fold_icmp_shl_nuw_c3( +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[X:%.*]], 1 +; CHECK-NEXT: ret i1 [[CMP]] +; + %shl = shl nuw i32 48, %x + %cmp = icmp uge i32 %shl, 144 + ret i1 %cmp +} + +define i1 @fold_icmp_shl_nuw_c2_indivisible(i32 %x) { +; CHECK-LABEL: @fold_icmp_shl_nuw_c2_indivisible( +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[X:%.*]], 2 +; CHECK-NEXT: ret i1 [[CMP]] +; + %shl = shl nuw i32 16, %x + %cmp = icmp ult i32 %shl, 63 + ret i1 %cmp +} + +; Negative tests + +define i1 @fold_icmp_shl_c2_without_nuw(i32 %x) { +; CHECK-LABEL: @fold_icmp_shl_c2_without_nuw( +; CHECK-NEXT: [[SHL:%.*]] = shl i32 16, [[X:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[SHL]], 64 +; CHECK-NEXT: ret i1 [[CMP]] +; + %shl = shl i32 16, %x + %cmp = icmp ult i32 %shl, 64 + ret i1 %cmp +} + +; Make sure this trivial case is folded by InstSimplify. +define i1 @fold_icmp_shl_nuw_c2_precondition1(i32 %x) { +; CHECK-LABEL: @fold_icmp_shl_nuw_c2_precondition1( +; CHECK-NEXT: ret i1 true +; + %shl = shl nuw i32 0, %x + %cmp = icmp ult i32 %shl, 63 + ret i1 %cmp +} + +; Make sure this trivial case is folded by InstSimplify. +define i1 @fold_icmp_shl_nuw_c2_precondition2(i32 %x) { +; CHECK-LABEL: @fold_icmp_shl_nuw_c2_precondition2( +; CHECK-NEXT: ret i1 false +; + %shl = shl nuw i32 127, %x + %cmp = icmp ult i32 %shl, 63 + ret i1 %cmp +} + +; Make sure we don't crash on this case. +define i1 @fold_icmp_shl_nuw_c2_precondition3(i32 %x) { +; CHECK-LABEL: @fold_icmp_shl_nuw_c2_precondition3( +; CHECK-NEXT: ret i1 false +; + %shl = shl nuw i32 1, %x + %cmp = icmp ult i32 %shl, 1 + ret i1 %cmp +} diff --git a/llvm/test/Transforms/InstCombine/phi.ll b/llvm/test/Transforms/InstCombine/phi.ll index 3b1fa3a97d9c..b33ad9a7d339 100644 --- a/llvm/test/Transforms/InstCombine/phi.ll +++ b/llvm/test/Transforms/InstCombine/phi.ll @@ -2742,3 +2742,54 @@ loop.latch: call void @use(i32 %and) br label %loop } + +define void @test_dead_phi_web(i64 %index, i1 %cond) { +; CHECK-LABEL: @test_dead_phi_web( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[BB0:%.*]] +; CHECK: BB0: +; CHECK-NEXT: switch i64 [[INDEX:%.*]], label [[BB4:%.*]] [ +; CHECK-NEXT: i64 0, label [[BB1:%.*]] +; CHECK-NEXT: i64 1, label [[BB2:%.*]] +; CHECK-NEXT: i64 2, label [[BB3:%.*]] +; CHECK-NEXT: ] +; CHECK: BB1: +; CHECK-NEXT: br i1 [[COND:%.*]], label [[BB2]], label [[BB4]] +; CHECK: BB2: +; CHECK-NEXT: br i1 [[COND]], label [[BB3]], label [[BB4]] +; CHECK: BB3: +; CHECK-NEXT: br label [[BB4]] +; CHECK: BB4: +; CHECK-NEXT: br i1 [[COND]], label [[BB0]], label [[BB5:%.*]] +; CHECK: BB5: +; CHECK-NEXT: ret void +; +entry: + br label %BB0 + +BB0: ; preds = %BB4, %entry + %a = phi float [ 0.0, %entry ], [ %x, %BB4 ] + switch i64 %index, label %BB4 [ + i64 0, label %BB1 + i64 1, label %BB2 + i64 2, label %BB3 + ] + +BB1: ; preds = %BB0 + br i1 %cond, label %BB2, label %BB4 + +BB2: ; preds = %BB1, %BB0 + %b = phi float [ 2.0, %BB0 ], [ %a, %BB1 ] + br i1 %cond, label %BB3, label %BB4 + +BB3: ; preds = %BB2, %BB0 + %c = phi float [ 3.0, %BB0 ], [ %b, %BB2 ] + br label %BB4 + +BB4: ; preds = %BB3, %BB2, %BB1, %BB0 + %x = phi float [ %a, %BB0 ], [ %a, %BB1 ], [ %b, %BB2 ], [ %c, %BB3 ] + br i1 %cond, label %BB0, label %BB5 + +BB5: ; preds = %BB4 + ret void +} diff --git a/llvm/test/Transforms/LICM/hoist-deref-load.ll b/llvm/test/Transforms/LICM/hoist-deref-load.ll index 149976ab1874..c498e85ddd6c 100644 --- a/llvm/test/Transforms/LICM/hoist-deref-load.ll +++ b/llvm/test/Transforms/LICM/hoist-deref-load.ll @@ -420,7 +420,7 @@ for.end: ; preds = %for.inc, %entry define void @test7(ptr noalias %a, ptr %b, ptr %cptr, i32 %n) #0 { ; CHECK-LABEL: @test7( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[C:%.*]] = load ptr, ptr [[CPTR:%.*]], align 8, !dereferenceable !0, !align !0 +; CHECK-NEXT: [[C:%.*]] = load ptr, ptr [[CPTR:%.*]], align 8, !dereferenceable [[META0:![0-9]+]], !align [[META0]] ; CHECK-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[N:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP11]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] ; CHECK: for.body.preheader: @@ -492,7 +492,7 @@ for.end: ; preds = %for.inc, %entry define void @test8(ptr noalias %a, ptr %b, ptr %cptr, i32 %n) #0 { ; CHECK-LABEL: @test8( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[C:%.*]] = load ptr, ptr [[CPTR:%.*]], align 8, !dereferenceable_or_null !0, !align !0 +; CHECK-NEXT: [[C:%.*]] = load ptr, ptr [[CPTR:%.*]], align 8, !dereferenceable_or_null [[META0]], !align [[META0]] ; CHECK-NEXT: [[NOT_NULL:%.*]] = icmp ne ptr [[C]], null ; CHECK-NEXT: br i1 [[NOT_NULL]], label [[NOT_NULL:%.*]], label [[FOR_END:%.*]] ; CHECK: not.null: @@ -562,7 +562,7 @@ for.end: ; preds = %for.inc, %entry, %n define void @test9(ptr noalias %a, ptr %b, ptr %cptr, i32 %n) #0 { ; CHECK-LABEL: @test9( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[C:%.*]] = load ptr, ptr [[CPTR:%.*]], align 8, !dereferenceable_or_null !0 +; CHECK-NEXT: [[C:%.*]] = load ptr, ptr [[CPTR:%.*]], align 8, !dereferenceable_or_null [[META0]] ; CHECK-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[N:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP11]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] ; CHECK: for.body.preheader: @@ -693,7 +693,7 @@ define void @test11(ptr noalias %a, ptr %b, ptr dereferenceable(8) %cptr, i32 %n ; CHECK-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[N:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP11]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] ; CHECK: for.body.preheader: -; CHECK-NEXT: [[C:%.*]] = load ptr, ptr [[CPTR:%.*]], align 8, !dereferenceable !0 +; CHECK-NEXT: [[C:%.*]] = load ptr, ptr [[CPTR:%.*]], align 8, !dereferenceable [[META0]] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[FOR_BODY_PREHEADER]] ] @@ -1164,5 +1164,78 @@ for.end: ; preds = %for.inc, %entry ret void } +declare void @use(i64) + +define void @licm_deref_no_hoist(i1 %c1, i1 %c2, ptr align 8 dereferenceable(8) %p1) { +; CHECK-LABEL: @licm_deref_no_hoist( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P2:%.*]] = load ptr, ptr [[P1:%.*]], align 8, !align [[META1:![0-9]+]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: br i1 [[C1:%.*]], label [[IF:%.*]], label [[LOOP_LATCH:%.*]] +; CHECK: if: +; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[P2]], align 8 +; CHECK-NEXT: call void @use(i64 [[V]]) #[[ATTR1:[0-9]+]] +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: br i1 [[C2:%.*]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + br i1 %c1, label %if, label %loop.latch + +if: + %p2 = load ptr, ptr %p1, align 8, !dereferenceable !1, !align !1 + %v = load i64, ptr %p2, align 8 + call void @use(i64 %v) memory(none) + br label %loop.latch + +loop.latch: + br i1 %c2, label %loop, label %exit + +exit: + ret void +} + +define void @licm_deref_hoist(i1 %c1, i1 %c2, ptr align 8 dereferenceable(8) %p1) { +; CHECK-LABEL: @licm_deref_hoist( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P2:%.*]] = load ptr, ptr [[P1:%.*]], align 8, !dereferenceable [[META1]], !align [[META1]] +; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[P2]], align 8 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: br i1 [[C1:%.*]], label [[IF:%.*]], label [[LOOP_LATCH:%.*]] +; CHECK: if: +; CHECK-NEXT: call void @use(i64 [[V]]) #[[ATTR1]] +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: br i1 [[C2:%.*]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %p2 = load ptr, ptr %p1, align 8, !dereferenceable !1, !align !1 + br label %loop + +loop: + br i1 %c1, label %if, label %loop.latch + +if: + %v = load i64, ptr %p2, align 8 + call void @use(i64 %v) memory(none) + br label %loop.latch + +loop.latch: + br i1 %c2, label %loop, label %exit + +exit: + ret void +} + attributes #0 = { nounwind uwtable nofree nosync } !0 = !{i64 4} +!1 = !{i64 8} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll index a1f6ba487e84..6ec9eb849dd5 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll @@ -55,8 +55,8 @@ define void @pointer_induction_used_as_vector(ptr noalias %start.1, ptr noalias ; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8 ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START_1:%.*]], i64 [[TMP4]] ; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, ptr [[START_2:%.*]], i64 [[N_VEC]] -; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 2 +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] @@ -70,23 +70,23 @@ define void @pointer_induction_used_as_vector(ptr noalias %start.1, ptr noalias ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64() ; CHECK-NEXT: [[TMP13:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[TMP12]] -; CHECK-NEXT: [[VECTOR_GEP:%.*]] = mul <vscale x 2 x i64> [[TMP13]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[VECTOR_GEP]] +; CHECK-NEXT: [[TMP14:%.*]] = mul <vscale x 2 x i64> [[TMP13]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[TMP14]] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START_1]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, <vscale x 2 x ptr> [[TMP14]], i64 1 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr ptr, ptr [[NEXT_GEP]], i32 0 -; CHECK-NEXT: store <vscale x 2 x ptr> [[TMP15]], ptr [[TMP16]], align 8 -; CHECK-NEXT: [[TMP17:%.*]] = extractelement <vscale x 2 x ptr> [[TMP14]], i32 0 -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP17]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i8>, ptr [[TMP18]], align 1 -; CHECK-NEXT: [[TMP19:%.*]] = add <vscale x 2 x i8> [[WIDE_LOAD]], shufflevector (<vscale x 2 x i8> insertelement (<vscale x 2 x i8> poison, i8 1, i64 0), <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer) -; CHECK-NEXT: store <vscale x 2 x i8> [[TMP19]], ptr [[TMP18]], align 1 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP21]] +; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START_1]], i64 [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, <vscale x 2 x ptr> [[VECTOR_GEP]], i64 1 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr ptr, ptr [[NEXT_GEP]], i32 0 +; CHECK-NEXT: store <vscale x 2 x ptr> [[TMP16]], ptr [[TMP17]], align 8 +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <vscale x 2 x ptr> [[VECTOR_GEP]], i32 0 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[TMP18]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i8>, ptr [[TMP19]], align 1 +; CHECK-NEXT: [[TMP20:%.*]] = add <vscale x 2 x i8> [[WIDE_LOAD]], shufflevector (<vscale x 2 x i8> insertelement (<vscale x 2 x i8> poison, i8 1, i64 0), <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: store <vscale x 2 x i8> [[TMP20]], ptr [[TMP19]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -148,30 +148,30 @@ define void @pointer_induction(ptr noalias %start, i64 %N) { ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START:%.*]], i64 [[N_VEC]] -; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 2 +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 -; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 1 -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP6]], 0 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 +; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = mul i64 1, [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP8]], 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP11]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64() -; CHECK-NEXT: [[TMP11:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[TMP10]] -; CHECK-NEXT: [[VECTOR_GEP:%.*]] = mul <vscale x 2 x i64> [[TMP11]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[VECTOR_GEP]] -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <vscale x 2 x ptr> [[TMP12]], i32 0 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP13]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i8>, ptr [[TMP14]], align 1 -; CHECK-NEXT: [[TMP15:%.*]] = add <vscale x 2 x i8> [[WIDE_LOAD]], shufflevector (<vscale x 2 x i8> insertelement (<vscale x 2 x i8> poison, i8 1, i64 0), <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer) -; CHECK-NEXT: store <vscale x 2 x i8> [[TMP15]], ptr [[TMP14]], align 1 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX2]], [[TMP17]] -; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64() +; CHECK-NEXT: [[TMP13:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = mul <vscale x 2 x i64> [[TMP13]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <vscale x 2 x ptr> [[VECTOR_GEP]], i32 0 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[TMP15]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i8>, ptr [[TMP16]], align 1 +; CHECK-NEXT: [[TMP17:%.*]] = add <vscale x 2 x i8> [[WIDE_LOAD]], shufflevector (<vscale x 2 x i8> insertelement (<vscale x 2 x i8> poison, i8 1, i64 0), <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: store <vscale x 2 x i8> [[TMP17]], ptr [[TMP16]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX2]], [[TMP6]] +; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP10]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll index 123b3cf3df14..bfb5cf8d6662 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll @@ -243,14 +243,14 @@ define i32 @pointer_iv_mixed(ptr noalias %a, ptr noalias %b, i64 %n) #0 { ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 3 ; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64() -; CHECK-NEXT: [[VECTOR_GEP:%.*]] = shl <vscale x 2 x i64> [[TMP9]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 2, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[VECTOR_GEP]] +; CHECK-NEXT: [[TMP10:%.*]] = shl <vscale x 2 x i64> [[TMP9]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 2, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[TMP10]] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 3 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <vscale x 2 x ptr> [[TMP10]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <vscale x 2 x ptr> [[VECTOR_GEP]], i64 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i32>, ptr [[TMP11]], align 8 ; CHECK-NEXT: [[TMP12]] = add <vscale x 2 x i32> [[WIDE_LOAD]], [[VEC_PHI]] -; CHECK-NEXT: store <vscale x 2 x ptr> [[TMP10]], ptr [[NEXT_GEP]], align 8 +; CHECK-NEXT: store <vscale x 2 x ptr> [[VECTOR_GEP]], ptr [[NEXT_GEP]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -318,10 +318,10 @@ define void @phi_used_in_vector_compare_and_scalar_indvar_update_and_store(ptr % ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64() -; CHECK-NEXT: [[VECTOR_GEP:%.*]] = shl <vscale x 2 x i64> [[TMP4]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[VECTOR_GEP]] -; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <vscale x 2 x ptr> [[TMP5]], zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 2 x ptr> [[TMP5]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = shl <vscale x 2 x i64> [[TMP4]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <vscale x 2 x ptr> [[VECTOR_GEP]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 2 x ptr> [[VECTOR_GEP]], i64 0 ; CHECK-NEXT: call void @llvm.masked.store.nxv2i16.p0(<vscale x 2 x i16> zeroinitializer, ptr [[TMP7]], i32 2, <vscale x 2 x i1> [[TMP6]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP3]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll index 837e39d12359..99b8cb7ae94b 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll @@ -181,15 +181,15 @@ define void @single_constant_stride_ptr_iv(ptr %p) { ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP15:%.*]] = add <vscale x 4 x i64> [[DOTSPLAT]], [[TMP14]] -; CHECK-NEXT: [[VECTOR_GEP:%.*]] = mul <vscale x 4 x i64> [[TMP15]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 8, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 4 x i64> [[VECTOR_GEP]] -; CHECK-NEXT: [[TMP17:%.*]] = extractelement <vscale x 4 x ptr> [[TMP16]], i32 0 +; CHECK-NEXT: [[TMP16:%.*]] = mul <vscale x 4 x i64> [[TMP15]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 8, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 4 x i64> [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <vscale x 4 x ptr> [[VECTOR_GEP]], i32 0 ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP17]], i32 0 ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[TMP18]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]]) ; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0 ; CHECK-NEXT: [[TMP20:%.*]] = add <vscale x 4 x i32> [[TMP19]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP20]], <vscale x 4 x ptr> [[TMP16]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP20]], <vscale x 4 x ptr> [[VECTOR_GEP]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP12]] ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -757,8 +757,8 @@ define void @double_stride_ptr_iv(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-NEXT: [[TMP20:%.*]] = add <vscale x 4 x i64> [[DOTSPLAT]], [[TMP19]] ; STRIDED-NEXT: [[DOTSPLATINSERT9:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[STRIDE]], i64 0 ; STRIDED-NEXT: [[DOTSPLAT10:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT9]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer -; STRIDED-NEXT: [[VECTOR_GEP:%.*]] = mul <vscale x 4 x i64> [[TMP20]], [[DOTSPLAT10]] -; STRIDED-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 4 x i64> [[VECTOR_GEP]] +; STRIDED-NEXT: [[TMP21:%.*]] = mul <vscale x 4 x i64> [[TMP20]], [[DOTSPLAT10]] +; STRIDED-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 4 x i64> [[TMP21]] ; STRIDED-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() ; STRIDED-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 4 ; STRIDED-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 1 @@ -768,11 +768,11 @@ define void @double_stride_ptr_iv(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-NEXT: [[DOTSPLAT14:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT13]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer ; STRIDED-NEXT: [[TMP27:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64() ; STRIDED-NEXT: [[TMP28:%.*]] = add <vscale x 4 x i64> [[DOTSPLAT14]], [[TMP27]] -; STRIDED-NEXT: [[VECTOR_GEP17:%.*]] = mul <vscale x 4 x i64> [[TMP28]], [[DOTSPLAT10]] -; STRIDED-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[POINTER_PHI11]], <vscale x 4 x i64> [[VECTOR_GEP17]] -; STRIDED-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP21]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> poison), !alias.scope [[META15:![0-9]+]] +; STRIDED-NEXT: [[TMP29:%.*]] = mul <vscale x 4 x i64> [[TMP28]], [[DOTSPLAT10]] +; STRIDED-NEXT: [[VECTOR_GEP17:%.*]] = getelementptr i8, ptr [[POINTER_PHI11]], <vscale x 4 x i64> [[TMP29]] +; STRIDED-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[VECTOR_GEP]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> poison), !alias.scope [[META15:![0-9]+]] ; STRIDED-NEXT: [[TMP30:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) -; STRIDED-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP30]], <vscale x 4 x ptr> [[TMP29]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)), !alias.scope [[META18:![0-9]+]], !noalias [[META15]] +; STRIDED-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP30]], <vscale x 4 x ptr> [[VECTOR_GEP17]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)), !alias.scope [[META18:![0-9]+]], !noalias [[META15]] ; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP13]] ; STRIDED-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP17]] ; STRIDED-NEXT: [[PTR_IND12]] = getelementptr i8, ptr [[POINTER_PHI11]], i64 [[TMP25]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll index 04b3ba52cbef..6dfe5b608199 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll @@ -31,7 +31,7 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> ; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]> -; IF-EVL-NEXT: WIDEN-VP ir<[[ADD:%.+]]> = add nsw ir<[[LD2]]>, ir<[[LD1]]>, vp<[[EVL]]> +; IF-EVL-NEXT: WIDEN ir<[[ADD:%.+]]> = vp.add nsw ir<[[LD2]]>, ir<[[LD1]]>, vp<[[EVL]]> ; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]> ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[ADD]]>, vp<[[EVL]]> diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll index 35037968160c..41d9c4d84202 100644 --- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll @@ -142,22 +142,22 @@ define void @pointer_induction_used_as_vector(ptr noalias %start.1, ptr noalias ; CHECK: vector.body: ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> <i64 0, i64 1, i64 2, i64 3> +; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> <i64 0, i64 1, i64 2, i64 3> ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START_1]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, <4 x ptr> [[TMP1]], i64 1 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr ptr, ptr [[NEXT_GEP]], i32 0 -; CHECK-NEXT: store <4 x ptr> [[TMP3]], ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x ptr> [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP6]], align 1 -; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i8> [[WIDE_LOAD]], <i8 1, i8 1, i8 1, i8 1> -; CHECK-NEXT: store <4 x i8> [[TMP7]], ptr [[TMP6]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START_1]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, <4 x ptr> [[VECTOR_GEP]], i64 1 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr ptr, ptr [[NEXT_GEP]], i32 0 +; CHECK-NEXT: store <4 x ptr> [[TMP2]], ptr [[TMP3]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x ptr> [[VECTOR_GEP]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i8> [[WIDE_LOAD]], <i8 1, i8 1, i8 1, i8 1> +; CHECK-NEXT: store <4 x i8> [[TMP6]], ptr [[TMP5]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 4 -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -237,13 +237,13 @@ define void @non_constant_vector_expansion(i32 %0, ptr %call) { ; STRIDED-NEXT: [[TMP3:%.*]] = mul i64 [[TMP1]], 4 ; STRIDED-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP1]], i64 0 ; STRIDED-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer -; STRIDED-NEXT: [[VECTOR_GEP:%.*]] = mul <4 x i64> <i64 0, i64 1, i64 2, i64 3>, [[DOTSPLAT]] -; STRIDED-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> [[VECTOR_GEP]] +; STRIDED-NEXT: [[TMP4:%.*]] = mul <4 x i64> <i64 0, i64 1, i64 2, i64 3>, [[DOTSPLAT]] +; STRIDED-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> [[TMP4]] ; STRIDED-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32 ; STRIDED-NEXT: [[TMP5:%.*]] = add i32 [[OFFSET_IDX]], 0 ; STRIDED-NEXT: [[TMP6:%.*]] = getelementptr ptr, ptr [[CALL:%.*]], i32 [[TMP5]] ; STRIDED-NEXT: [[TMP7:%.*]] = getelementptr ptr, ptr [[TMP6]], i32 0 -; STRIDED-NEXT: store <4 x ptr> [[TMP4]], ptr [[TMP7]], align 4 +; STRIDED-NEXT: store <4 x ptr> [[VECTOR_GEP]], ptr [[TMP7]], align 4 ; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; STRIDED-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP3]] ; STRIDED-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 diff --git a/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll b/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll index e58c99dc4bc5..43eeefb77449 100644 --- a/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll +++ b/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll @@ -102,14 +102,14 @@ define void @integer_induction_wraps_scev_predicate_known(i32 %x, ptr %call, ptr ; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP0]], 4 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP0]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[VECTOR_GEP:%.*]] = mul <4 x i64> <i64 0, i64 1, i64 2, i64 3>, [[DOTSPLAT]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> [[VECTOR_GEP]] +; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i64> <i64 0, i64 1, i64 2, i64 3>, [[DOTSPLAT]] +; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> [[TMP3]] ; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[INDEX]] to i32 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 30, [[DOTCAST]] ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr ptr, ptr [[CALL]], i32 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr ptr, ptr [[TMP5]], i32 0 -; CHECK-NEXT: store <4 x ptr> [[TMP3]], ptr [[TMP6]], align 4 +; CHECK-NEXT: store <4 x ptr> [[VECTOR_GEP]], ptr [[TMP6]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4294967264 diff --git a/llvm/test/Transforms/SimplifyCFG/UnreachableEliminate.ll b/llvm/test/Transforms/SimplifyCFG/UnreachableEliminate.ll index c4602e72ecbc..aae1ab032f36 100644 --- a/llvm/test/Transforms/SimplifyCFG/UnreachableEliminate.ll +++ b/llvm/test/Transforms/SimplifyCFG/UnreachableEliminate.ll @@ -918,6 +918,280 @@ bb5: ; preds = %bb3, %bb ret i32 %i7 } +declare void @side.effect() +declare i8 @get.i8() + +define i8 @udiv_by_zero(i8 %x, i8 %i, i8 %v) { +; CHECK-LABEL: @udiv_by_zero( +; CHECK-NEXT: entry: +; CHECK-NEXT: switch i8 [[I:%.*]], label [[SW_DEFAULT:%.*]] [ +; CHECK-NEXT: i8 9, label [[SW_BB2:%.*]] +; CHECK-NEXT: i8 2, label [[RETURN:%.*]] +; CHECK-NEXT: ] +; CHECK: sw.bb2: +; CHECK-NEXT: br label [[RETURN]] +; CHECK: sw.default: +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: [[Y:%.*]] = phi i8 [ 9, [[SW_BB2]] ], [ [[V:%.*]], [[SW_DEFAULT]] ], [ 2, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[R:%.*]] = udiv i8 [[X:%.*]], [[Y]] +; CHECK-NEXT: ret i8 [[R]] +; +entry: + switch i8 %i, label %sw.default [ + i8 0, label %sw.bb0 + i8 2, label %sw.bb1 + i8 9, label %sw.bb2 + ] + +sw.bb0: + br label %return + +sw.bb1: + br label %return +sw.bb2: + br label %return +sw.default: + br label %return + +return: + %y = phi i8 [ 0, %sw.bb0 ], [ 2, %sw.bb1 ], [ 9, %sw.bb2 ], [ %v, %sw.default ] + %r = udiv i8 %x, %y + ret i8 %r +} + +define i8 @urem_by_zero(i8 %x, i8 %i, i8 %v) { +; CHECK-LABEL: @urem_by_zero( +; CHECK-NEXT: entry: +; CHECK-NEXT: switch i8 [[I:%.*]], label [[SW_DEFAULT:%.*]] [ +; CHECK-NEXT: i8 0, label [[RETURN:%.*]] +; CHECK-NEXT: i8 2, label [[SW_BB1:%.*]] +; CHECK-NEXT: i8 9, label [[SW_BB2:%.*]] +; CHECK-NEXT: ] +; CHECK: sw.bb1: +; CHECK-NEXT: br label [[RETURN]] +; CHECK: sw.bb2: +; CHECK-NEXT: br label [[RETURN]] +; CHECK: sw.default: +; CHECK-NEXT: unreachable +; CHECK: return: +; CHECK-NEXT: [[Y:%.*]] = phi i8 [ 2, [[SW_BB1]] ], [ 9, [[SW_BB2]] ], [ [[V:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[R:%.*]] = urem i8 [[X:%.*]], [[Y]] +; CHECK-NEXT: ret i8 [[R]] +; +entry: + switch i8 %i, label %sw.default [ + i8 0, label %sw.bb0 + i8 2, label %sw.bb1 + i8 9, label %sw.bb2 + ] + +sw.bb0: + br label %return + +sw.bb1: + br label %return +sw.bb2: + br label %return +sw.default: + br label %return + +return: + %y = phi i8 [ %v, %sw.bb0 ], [ 2, %sw.bb1 ], [ 9, %sw.bb2 ], [ 0, %sw.default ] + %r = urem i8 %x, %y + ret i8 %r +} + +define i8 @udiv_of_zero_okay(i8 %x, i8 %i, i8 %v) { +; CHECK-LABEL: @udiv_of_zero_okay( +; CHECK-NEXT: entry: +; CHECK-NEXT: switch i8 [[I:%.*]], label [[SW_DEFAULT:%.*]] [ +; CHECK-NEXT: i8 0, label [[RETURN:%.*]] +; CHECK-NEXT: i8 2, label [[SW_BB1:%.*]] +; CHECK-NEXT: i8 9, label [[SW_BB2:%.*]] +; CHECK-NEXT: ] +; CHECK: sw.bb1: +; CHECK-NEXT: br label [[RETURN]] +; CHECK: sw.bb2: +; CHECK-NEXT: br label [[RETURN]] +; CHECK: sw.default: +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: [[Y:%.*]] = phi i8 [ 2, [[SW_BB1]] ], [ 9, [[SW_BB2]] ], [ [[V:%.*]], [[SW_DEFAULT]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[R:%.*]] = udiv i8 [[Y]], [[X:%.*]] +; CHECK-NEXT: ret i8 [[R]] +; +entry: + switch i8 %i, label %sw.default [ + i8 0, label %sw.bb0 + i8 2, label %sw.bb1 + i8 9, label %sw.bb2 + ] + +sw.bb0: + br label %return + +sw.bb1: + br label %return +sw.bb2: + br label %return +sw.default: + br label %return + +return: + %y = phi i8 [ 0, %sw.bb0 ], [ 2, %sw.bb1 ], [ 9, %sw.bb2 ], [ %v, %sw.default ] + %r = udiv i8 %y, %x + ret i8 %r +} + +define i8 @srem_by_zero(i8 %x, i8 %i) { +; CHECK-LABEL: @srem_by_zero( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[I:%.*]], 9 +; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: call void @side.effect() +; CHECK-NEXT: unreachable +; CHECK: if.else: +; CHECK-NEXT: [[V:%.*]] = call i8 @get.i8() +; CHECK-NEXT: [[R:%.*]] = srem i8 [[X:%.*]], [[V]] +; CHECK-NEXT: ret i8 [[R]] +; +entry: + %cmp = icmp ult i8 %i, 9 + br i1 %cmp, label %if.then, label %if.else + +if.then: + call void @side.effect() + br label %if.end + +if.else: + %v = call i8 @get.i8() + br label %if.end + +if.end: + %y = phi i8 [ 0, %if.then ], [ %v, %if.else ] + %r = srem i8 %x, %y + ret i8 %r +} + +define i8 @srem_no_overflow_okay(i8 %i) { +; CHECK-LABEL: @srem_no_overflow_okay( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[I:%.*]], 9 +; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: call void @side.effect() +; CHECK-NEXT: br label [[IF_END:%.*]] +; CHECK: if.else: +; CHECK-NEXT: [[V:%.*]] = call i8 @get.i8() +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[Y:%.*]] = phi i8 [ -1, [[IF_THEN]] ], [ [[V]], [[IF_ELSE]] ] +; CHECK-NEXT: [[R:%.*]] = srem i8 [[Y]], -128 +; CHECK-NEXT: ret i8 [[R]] +; +entry: + %cmp = icmp ult i8 %i, 9 + br i1 %cmp, label %if.then, label %if.else + +if.then: + call void @side.effect() + br label %if.end + +if.else: + %v = call i8 @get.i8() + br label %if.end + +if.end: + %y = phi i8 [ -1, %if.then ], [ %v, %if.else ] + %r = srem i8 %y, 128 + ret i8 %r +} + +define i8 @sdiv_overflow_ub(i8 %i) { +; CHECK-LABEL: @sdiv_overflow_ub( +; CHECK-NEXT: entry: +; CHECK-NEXT: switch i8 [[I:%.*]], label [[SW_DEFAULT:%.*]] [ +; CHECK-NEXT: i8 0, label [[RETURN:%.*]] +; CHECK-NEXT: i8 2, label [[SW_BB1:%.*]] +; CHECK-NEXT: i8 9, label [[SW_BB2:%.*]] +; CHECK-NEXT: ] +; CHECK: sw.bb1: +; CHECK-NEXT: [[V:%.*]] = call i8 @get.i8() +; CHECK-NEXT: br label [[RETURN]] +; CHECK: sw.bb2: +; CHECK-NEXT: br label [[RETURN]] +; CHECK: sw.default: +; CHECK-NEXT: unreachable +; CHECK: return: +; CHECK-NEXT: [[Y:%.*]] = phi i8 [ [[V]], [[SW_BB1]] ], [ -1, [[SW_BB2]] ], [ 4, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[R:%.*]] = sdiv i8 -128, [[Y]] +; CHECK-NEXT: ret i8 [[R]] +; +entry: + switch i8 %i, label %sw.default [ + i8 0, label %sw.bb0 + i8 2, label %sw.bb1 + i8 9, label %sw.bb2 + ] + +sw.bb0: + br label %return +sw.bb1: + %v = call i8 @get.i8() + br label %return +sw.bb2: + br label %return +sw.default: + unreachable + +return: + %y = phi i8 [ 4, %sw.bb0 ], [ %v, %sw.bb1 ], [ -1, %sw.bb2 ] + %r = sdiv i8 128, %y + ret i8 %r +} + +define i8 @sdiv_overflow_ub_2x(i8 %i) { +; CHECK-LABEL: @sdiv_overflow_ub_2x( +; CHECK-NEXT: entry: +; CHECK-NEXT: switch i8 [[I:%.*]], label [[SW_DEFAULT:%.*]] [ +; CHECK-NEXT: i8 9, label [[RETURN:%.*]] +; CHECK-NEXT: i8 2, label [[SW_BB1:%.*]] +; CHECK-NEXT: ] +; CHECK: sw.bb1: +; CHECK-NEXT: [[V:%.*]] = call i8 @get.i8() +; CHECK-NEXT: br label [[RETURN]] +; CHECK: sw.default: +; CHECK-NEXT: unreachable +; CHECK: return: +; CHECK-NEXT: [[Y:%.*]] = phi i8 [ [[V]], [[SW_BB1]] ], [ -1, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[R:%.*]] = sdiv i8 -128, [[Y]] +; CHECK-NEXT: ret i8 [[R]] +; +entry: + switch i8 %i, label %sw.default [ + i8 0, label %sw.bb0 + i8 2, label %sw.bb1 + i8 9, label %sw.bb2 + ] + +sw.bb0: + br label %return +sw.bb1: + %v = call i8 @get.i8() + br label %return +sw.bb2: + br label %return +sw.default: + unreachable + +return: + %y = phi i8 [ 0, %sw.bb0 ], [ %v, %sw.bb1 ], [ -1, %sw.bb2 ] + %r = sdiv i8 128, %y + ret i8 %r +} + attributes #0 = { null_pointer_is_valid } ;. ; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) } diff --git a/llvm/test/Transforms/SimplifyCFG/speculate-derefable-load.ll b/llvm/test/Transforms/SimplifyCFG/speculate-derefable-load.ll new file mode 100644 index 000000000000..9e3f333018e6 --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/speculate-derefable-load.ll @@ -0,0 +1,198 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -passes=simplifycfg < %s | FileCheck %s + +define i64 @align_deref_align(i1 %c, ptr %p) { +; CHECK-LABEL: define i64 @align_deref_align( +; CHECK-SAME: i1 [[C:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[P]], i64 8), "align"(ptr [[P]], i64 8) ] +; CHECK-NEXT: br i1 [[C]], label %[[IF:.*]], label %[[EXIT:.*]] +; CHECK: [[IF]]: +; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[P]], align 8 +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[V]], %[[IF]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: ret i64 [[RES]] +; +entry: + call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %p, i64 8), "align"(ptr %p, i64 8) ] + br i1 %c, label %if, label %exit + +if: + %v = load i64, ptr %p, align 8 + br label %exit + +exit: + %res = phi i64 [ %v, %if ], [ 0, %entry ] + ret i64 %res +} + +define i64 @assume_deref_align2(i1 %c1, i32 %x, ptr %p) { +; CHECK-LABEL: define i64 @assume_deref_align2( +; CHECK-SAME: i1 [[C1:%.*]], i32 [[X:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[P]], i64 8), "align"(ptr [[P]], i64 8) ] +; CHECK-NEXT: br i1 [[C1]], label %[[IF1:.*]], label %[[EXIT:.*]] +; CHECK: [[IF1]]: +; CHECK-NEXT: [[C2:%.*]] = icmp ugt i32 [[X]], 10 +; CHECK-NEXT: br i1 [[C2]], label %[[IF2:.*]], label %[[EXIT]] +; CHECK: [[IF2]]: +; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[P]], align 8 +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[V]], %[[IF2]] ], [ 1, %[[IF1]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: ret i64 [[RES]] +; +entry: + call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %p, i64 8), "align"(ptr %p, i64 8) ] + br i1 %c1, label %if1, label %exit + +if1: + %c2 = icmp ugt i32 %x, 10 + br i1 %c2, label %if2, label %exit + +if2: + %v = load i64, ptr %p, align 8 + br label %exit + +exit: + %res = phi i64 [ %v, %if2 ], [ 1, %if1 ], [ 0, %entry ] + ret i64 %res +} + +define i64 @assume_deref_align_not_dominating(i1 %c, ptr %p) { +; CHECK-LABEL: define i64 @assume_deref_align_not_dominating( +; CHECK-SAME: i1 [[C:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br i1 [[C]], label %[[IF:.*]], label %[[EXIT:.*]] +; CHECK: [[IF]]: +; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[P]], align 8 +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[V]], %[[IF]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[P]], i64 8), "align"(ptr [[P]], i64 8) ] +; CHECK-NEXT: ret i64 [[RES]] +; +entry: + br i1 %c, label %if, label %exit + +if: + %v = load i64, ptr %p, align 8 + br label %exit + +exit: + %res = phi i64 [ %v, %if ], [ 0, %entry ] + call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %p, i64 8), "align"(ptr %p, i64 8) ] + ret i64 %res +} + +; FIXME: This is a miscompile. +define i64 @deref_no_hoist(i1 %c, ptr align 8 dereferenceable(8) %p1) { +; CHECK-LABEL: define i64 @deref_no_hoist( +; CHECK-SAME: i1 [[C:%.*]], ptr align 8 dereferenceable(8) [[P1:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[P2:%.*]] = load ptr, ptr [[P1]], align 8, !align [[META0:![0-9]+]] +; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[P2]], align 8 +; CHECK-NEXT: [[RES:%.*]] = select i1 [[C]], i64 [[V]], i64 0 +; CHECK-NEXT: ret i64 [[RES]] +; +entry: + br i1 %c, label %if, label %exit + +if: + %p2 = load ptr, ptr %p1, align 8, !dereferenceable !0, !align !0 + %v = load i64, ptr %p2, align 8 + br label %exit + +exit: + %res = phi i64 [ %v, %if ], [ 0, %entry ] + ret i64 %res +} + +define i64 @deref_hoist(i1 %c, ptr align 8 dereferenceable(8) %p1) { +; CHECK-LABEL: define i64 @deref_hoist( +; CHECK-SAME: i1 [[C:%.*]], ptr align 8 dereferenceable(8) [[P1:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[P2:%.*]] = load ptr, ptr [[P1]], align 8, !dereferenceable [[META0]], !align [[META0]] +; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[P2]], align 8 +; CHECK-NEXT: [[RES:%.*]] = select i1 [[C]], i64 [[V]], i64 0 +; CHECK-NEXT: ret i64 [[RES]] +; +entry: + %p2 = load ptr, ptr %p1, align 8, !dereferenceable !0, !align !0 + br i1 %c, label %if, label %exit + +if: + %v = load i64, ptr %p2, align 8 + br label %exit + +exit: + %res = phi i64 [ %v, %if ], [ 0, %entry ] + ret i64 %res +} + +define i64 @deref_no_hoist2(i1 %c1, i32 %x, ptr align 8 dereferenceable(8) %p1) { +; CHECK-LABEL: define i64 @deref_no_hoist2( +; CHECK-SAME: i1 [[C1:%.*]], i32 [[X:%.*]], ptr align 8 dereferenceable(8) [[P1:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br i1 [[C1]], label %[[IF1:.*]], label %[[EXIT:.*]] +; CHECK: [[IF1]]: +; CHECK-NEXT: [[C2:%.*]] = icmp ugt i32 [[X]], 10 +; CHECK-NEXT: br i1 [[C2]], label %[[IF2:.*]], label %[[EXIT]] +; CHECK: [[IF2]]: +; CHECK-NEXT: [[P2:%.*]] = load ptr, ptr [[P1]], align 8, !dereferenceable [[META0]], !align [[META0]] +; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[P2]], align 8 +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[V]], %[[IF2]] ], [ 1, %[[IF1]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: ret i64 [[RES]] +; +entry: + br i1 %c1, label %if1, label %exit + +if1: + %c2 = icmp ugt i32 %x, 10 + br i1 %c2, label %if2, label %exit + +if2: + %p2 = load ptr, ptr %p1, align 8, !dereferenceable !0, !align !0 + %v = load i64, ptr %p2, align 8 + br label %exit + +exit: + %res = phi i64 [ %v, %if2 ], [ 1, %if1 ], [ 0, %entry ] + ret i64 %res +} + +define i64 @deref_hoist2(i1 %c1, i32 %x, ptr align 8 dereferenceable(8) %p1) { +; CHECK-LABEL: define i64 @deref_hoist2( +; CHECK-SAME: i1 [[C1:%.*]], i32 [[X:%.*]], ptr align 8 dereferenceable(8) [[P1:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[P2:%.*]] = load ptr, ptr [[P1]], align 8, !dereferenceable [[META0]], !align [[META0]] +; CHECK-NEXT: [[C2:%.*]] = icmp ugt i32 [[X]], 10 +; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[P2]], align 8 +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[C2]], i64 [[V]], i64 1 +; CHECK-NEXT: [[RES:%.*]] = select i1 [[C1]], i64 [[SPEC_SELECT]], i64 0 +; CHECK-NEXT: ret i64 [[RES]] +; +entry: + %p2 = load ptr, ptr %p1, align 8, !dereferenceable !0, !align !0 + br i1 %c1, label %if1, label %exit + +if1: + %c2 = icmp ugt i32 %x, 10 + br i1 %c2, label %if2, label %exit + +if2: + %v = load i64, ptr %p2, align 8 + br label %exit + +exit: + %res = phi i64 [ %v, %if2 ], [ 1, %if1 ], [ 0, %entry ] + ret i64 %res +} + +!0 = !{i64 8} +;. +; CHECK: [[META0]] = !{i64 8} +;. diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-writeback.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-writeback.s index 0c6ccc1face9..5ffaf9138d48 100644 --- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-writeback.s +++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-writeback.s @@ -3298,28 +3298,28 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 3004 +# CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 2000 # CHECK: Dispatch Width: 10 -# CHECK-NEXT: uOps Per Cycle: 0.67 -# CHECK-NEXT: IPC: 0.33 +# CHECK-NEXT: uOps Per Cycle: 3.94 +# CHECK-NEXT: IPC: 1.97 # CHECK-NEXT: Block RThroughput: 3.8 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123 -# CHECK-NEXT: Index 0123456789 0123456789 +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeER . . . . . . ldr b1, [x27], #254 -# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D======eeeeeeER. . . . . ldr h1, [x27], #254 -# CHECK-NEXT: [0,3] D============eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D===========eeeeeeER . . . ldr s1, [x27], #254 -# CHECK-NEXT: [0,5] .D=================eER . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D=================eeeeeeER . . ldr d1, [x27], #254 -# CHECK-NEXT: [0,7] .D=======================eER . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D======================eeeeeeER. ldr q1, [x27], #254 -# CHECK-NEXT: [0,9] . D============================eER add x0, x27, #1 +# CHECK: [0,0] DeeeeeeER . . ldr b1, [x27], #254 +# CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeeeeeER. . ldr h1, [x27], #254 +# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 +# CHECK-NEXT: [0,4] .D=eeeeeeER . ldr s1, [x27], #254 +# CHECK-NEXT: [0,5] .D==eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeeeeeER. ldr d1, [x27], #254 +# CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==eeeeeeER ldr q1, [x27], #254 +# CHECK-NEXT: [0,9] . D===eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3329,16 +3329,16 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr b1, [x27], #254 -# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ldr h1, [x27], #254 -# CHECK-NEXT: 3. 1 13.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 12.0 0.0 0.0 ldr s1, [x27], #254 -# CHECK-NEXT: 5. 1 18.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 18.0 0.0 0.0 ldr d1, [x27], #254 -# CHECK-NEXT: 7. 1 24.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 23.0 0.0 0.0 ldr q1, [x27], #254 -# CHECK-NEXT: 9. 1 29.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 15.2 0.1 0.0 <total> +# CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldr h1, [x27], #254 +# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ldr s1, [x27], #254 +# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ldr d1, [x27], #254 +# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ldr q1, [x27], #254 +# CHECK-NEXT: 9. 1 4.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.7 0.1 2.0 <total> # CHECK: [47] Code Region - G48 diff --git a/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml b/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml index 47aeb0ad8fde..26efb2bc97cd 100644 --- a/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml +++ b/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml @@ -34,7 +34,7 @@ # # CHECK: << Total TLI yes SDK no: 18 # CHECK: >> Total TLI no SDK yes: 0 -# CHECK: == Total TLI yes SDK yes: 250 +# CHECK: == Total TLI yes SDK yes: 253 # # WRONG_DETAIL: << TLI yes SDK no : '_ZdaPv' aka operator delete[](void*) # WRONG_DETAIL: >> TLI no SDK yes: '_ZdaPvj' aka operator delete[](void*, unsigned int) @@ -48,14 +48,14 @@ # WRONG_DETAIL: << TLI yes SDK no : 'fminimum_numl' # WRONG_SUMMARY: << Total TLI yes SDK no: 19{{$}} # WRONG_SUMMARY: >> Total TLI no SDK yes: 1{{$}} -# WRONG_SUMMARY: == Total TLI yes SDK yes: 249 +# WRONG_SUMMARY: == Total TLI yes SDK yes: 252 # ## The -COUNT suffix doesn't care if there are too many matches, so check ## the exact count first; the two directives should add up to that. ## Yes, this means additions to TLI will fail this test, but the argument ## to -COUNT can't be an expression. -# AVAIL: TLI knows 501 symbols, 268 available -# AVAIL-COUNT-268: {{^}} available +# AVAIL: TLI knows 504 symbols, 271 available +# AVAIL-COUNT-271: {{^}} available # AVAIL-NOT: {{^}} available # UNAVAIL-COUNT-233: not available # UNAVAIL-NOT: not available @@ -862,6 +862,18 @@ DynamicSymbols: Type: STT_FUNC Section: .text Binding: STB_GLOBAL + - Name: sincos + Type: STT_FUNC + Section: .text + Binding: STB_GLOBAL + - Name: sincosf + Type: STT_FUNC + Section: .text + Binding: STB_GLOBAL + - Name: sincosl + Type: STT_FUNC + Section: .text + Binding: STB_GLOBAL - Name: sinh Type: STT_FUNC Section: .text diff --git a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp index c081c44ed35d..ac8ccc03399e 100644 --- a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp +++ b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp @@ -339,6 +339,9 @@ TEST_F(TargetLibraryInfoTest, ValidProto) { "declare float @sinhf(float)\n" "declare x86_fp80 @sinhl(x86_fp80)\n" "declare x86_fp80 @sinl(x86_fp80)\n" + "declare void @sincos(double, ptr, ptr)\n" + "declare void @sincosf(float, ptr, ptr)\n" + "declare void @sincosl(x86_fp80, ptr, ptr)\n" "declare i32 @snprintf(i8*, i64, i8*, ...)\n" "declare i32 @sprintf(i8*, i8*, ...)\n" "declare double @sqrt(double)\n" diff --git a/llvm/unittests/SandboxIR/PassTest.cpp b/llvm/unittests/SandboxIR/PassTest.cpp index ed226d576558..2eaf369caf08 100644 --- a/llvm/unittests/SandboxIR/PassTest.cpp +++ b/llvm/unittests/SandboxIR/PassTest.cpp @@ -179,7 +179,7 @@ TEST_F(PassTest, ParsePassPipeline) { Registry.registerPass(std::make_unique<TestPass1>()); Registry.registerPass(std::make_unique<TestPass2>()); - auto &FPM = + [[maybe_unused]] auto &FPM = Registry.parseAndCreatePassPipeline("test-pass1,test-pass2,test-pass1"); #ifndef NDEBUG std::string Buff; diff --git a/llvm/unittests/SandboxIR/SandboxIRTest.cpp b/llvm/unittests/SandboxIR/SandboxIRTest.cpp index 5ded063ef6f7..8807716a5273 100644 --- a/llvm/unittests/SandboxIR/SandboxIRTest.cpp +++ b/llvm/unittests/SandboxIR/SandboxIRTest.cpp @@ -1051,6 +1051,72 @@ define void @foo() { EXPECT_EQ(GV0->getCodeModel(), LLVMGV0->getCodeModel()); } +TEST_F(SandboxIRTest, GlobalAlias) { + parseIR(C, R"IR( +@alias0 = dso_local alias void(), ptr @foo +@alias1 = dso_local alias void(), ptr @foo +declare void @bar(); +define void @foo() { + call void @alias0() + call void @alias1() + call void @bar() + ret void +} +)IR"); + Function &LLVMF = *M->getFunction("foo"); + auto *LLVMBB = &*LLVMF.begin(); + auto LLVMIt = LLVMBB->begin(); + auto *LLVMCall0 = cast<llvm::CallInst>(&*LLVMIt++); + auto *LLVMAlias0 = cast<llvm::GlobalAlias>(LLVMCall0->getCalledOperand()); + sandboxir::Context Ctx(C); + + auto &F = *Ctx.createFunction(&LLVMF); + auto *BB = &*F.begin(); + auto It = BB->begin(); + auto *Call0 = cast<sandboxir::CallInst>(&*It++); + auto *Call1 = cast<sandboxir::CallInst>(&*It++); + auto *CallBar = cast<sandboxir::CallInst>(&*It++); + auto *CalleeBar = cast<sandboxir::Constant>(CallBar->getCalledOperand()); + // Check classof(), creation. + auto *Alias0 = cast<sandboxir::GlobalAlias>(Call0->getCalledOperand()); + auto *Alias1 = cast<sandboxir::GlobalAlias>(Call1->getCalledOperand()); + // Check getIterator(). + { + auto It0 = Alias0->getIterator(); + auto It1 = Alias1->getIterator(); + EXPECT_EQ(&*It0, Alias0); + EXPECT_EQ(&*It1, Alias1); + EXPECT_EQ(std::next(It0), It1); + EXPECT_EQ(std::prev(It1), It0); + EXPECT_EQ(&*std::next(It0), Alias1); + EXPECT_EQ(&*std::prev(It1), Alias0); + } + // Check getReverseIterator(). + { + auto RevIt0 = Alias0->getReverseIterator(); + auto RevIt1 = Alias1->getReverseIterator(); + EXPECT_EQ(&*RevIt0, Alias0); + EXPECT_EQ(&*RevIt1, Alias1); + EXPECT_EQ(std::prev(RevIt0), RevIt1); + EXPECT_EQ(std::next(RevIt1), RevIt0); + EXPECT_EQ(&*std::prev(RevIt0), Alias1); + EXPECT_EQ(&*std::next(RevIt1), Alias0); + } + // Check getAliasee(). + EXPECT_EQ(Alias0->getAliasee(), Ctx.getValue(LLVMAlias0->getAliasee())); + // Check setAliasee(). + auto *OrigAliasee = Alias0->getAliasee(); + auto *NewAliasee = CalleeBar; + EXPECT_NE(NewAliasee, OrigAliasee); + Alias0->setAliasee(NewAliasee); + EXPECT_EQ(Alias0->getAliasee(), NewAliasee); + Alias0->setAliasee(OrigAliasee); + EXPECT_EQ(Alias0->getAliasee(), OrigAliasee); + // Check getAliaseeObject(). + EXPECT_EQ(Alias0->getAliaseeObject(), + Ctx.getValue(LLVMAlias0->getAliaseeObject())); +} + TEST_F(SandboxIRTest, BlockAddress) { parseIR(C, R"IR( define void @foo(ptr %ptr) { @@ -1509,12 +1575,16 @@ bb1: for (sandboxir::Instruction &I : BB0) { EXPECT_EQ(&I, Ctx.getValue(LLVMI)); LLVMI = LLVMI->getNextNode(); + // Check getNodeParent(). + EXPECT_EQ(I.getIterator().getNodeParent(), &BB0); } LLVMI = &*LLVMBB1->begin(); for (sandboxir::Instruction &I : BB1) { EXPECT_EQ(&I, Ctx.getValue(LLVMI)); LLVMI = LLVMI->getNextNode(); } + // Check NodeParent() for BB::end(). + EXPECT_EQ(BB0.end().getNodeParent(), &BB0); // Check BB.getTerminator() EXPECT_EQ(BB0.getTerminator(), Ctx.getValue(LLVMBB0->getTerminator())); diff --git a/llvm/unittests/SandboxIR/TrackerTest.cpp b/llvm/unittests/SandboxIR/TrackerTest.cpp index f46e16e626ba..da5416395ec4 100644 --- a/llvm/unittests/SandboxIR/TrackerTest.cpp +++ b/llvm/unittests/SandboxIR/TrackerTest.cpp @@ -1638,6 +1638,37 @@ define void @foo() { EXPECT_EQ(GV0->isExternallyInitialized(), OrigIsExtInit); } +TEST_F(TrackerTest, GlobalAliasSetters) { + parseIR(C, R"IR( +@alias = dso_local alias void(), ptr @foo +declare void @bar(); +define void @foo() { + call void @alias() + call void @bar() + ret void +} +)IR"); + Function &LLVMF = *M->getFunction("foo"); + sandboxir::Context Ctx(C); + + auto &F = *Ctx.createFunction(&LLVMF); + auto *BB = &*F.begin(); + auto It = BB->begin(); + auto *Call0 = cast<sandboxir::CallInst>(&*It++); + auto *Call1 = cast<sandboxir::CallInst>(&*It++); + auto *Callee1 = cast<sandboxir::Constant>(Call1->getCalledOperand()); + auto *Alias = cast<sandboxir::GlobalAlias>(Call0->getCalledOperand()); + // Check setAliasee(). + auto *OrigAliasee = Alias->getAliasee(); + auto *NewAliasee = Callee1; + EXPECT_NE(NewAliasee, OrigAliasee); + Ctx.save(); + Alias->setAliasee(NewAliasee); + EXPECT_EQ(Alias->getAliasee(), NewAliasee); + Ctx.revert(); + EXPECT_EQ(Alias->getAliasee(), OrigAliasee); +} + TEST_F(TrackerTest, SetVolatile) { parseIR(C, R"IR( define void @foo(ptr %arg0, i8 %val) { diff --git a/llvm/unittests/Support/raw_ostream_test.cpp b/llvm/unittests/Support/raw_ostream_test.cpp index 1c6dfb6260cc..99aa350adad7 100644 --- a/llvm/unittests/Support/raw_ostream_test.cpp +++ b/llvm/unittests/Support/raw_ostream_test.cpp @@ -177,6 +177,19 @@ TEST(raw_ostreamTest, Justify) { EXPECT_EQ("none", printToString(center_justify("none", 1), 1)); } +TEST(raw_ostreamTest, Indent) { + indent Indent(4); + auto Spaces = [](int N) { return std::string(N, ' '); }; + EXPECT_EQ(Spaces(4), printToString(Indent)); + EXPECT_EQ("", printToString(indent(0))); + EXPECT_EQ(Spaces(5), printToString(Indent + 1)); + EXPECT_EQ(Spaces(3), printToString(Indent - 1)); + Indent += 1; + EXPECT_EQ(Spaces(5), printToString(Indent)); + Indent -= 1; + EXPECT_EQ(Spaces(4), printToString(Indent)); +} + TEST(raw_ostreamTest, FormatHex) { EXPECT_EQ("0x1234", printToString(format_hex(0x1234, 6), 6)); EXPECT_EQ("0x001234", printToString(format_hex(0x1234, 8), 8)); diff --git a/llvm/unittests/TargetParser/Host.cpp b/llvm/unittests/TargetParser/Host.cpp index f8dd1d3a60a0..5e2edcef09bf 100644 --- a/llvm/unittests/TargetParser/Host.cpp +++ b/llvm/unittests/TargetParser/Host.cpp @@ -83,8 +83,20 @@ TEST(getLinuxHostCPUName, AArch64) { "CPU part : 0xd40"), "neoverse-v1"); EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x41\n" + "CPU part : 0xd4f"), + "neoverse-v2"); + EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x41\n" + "CPU part : 0xd84"), + "neoverse-v3"); + EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x41\n" "CPU part : 0xd0c"), "neoverse-n1"); + EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x41\n" + "CPU part : 0xd49"), + "neoverse-n2"); + EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x41\n" + "CPU part : 0xd8e"), + "neoverse-n3"); // Verify that both CPU implementer and CPU part are checked: EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x40\n" "CPU part : 0xd03"), diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt index 488c9c2344b5..10a730290608 100644 --- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt +++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt @@ -9,4 +9,6 @@ set(LLVM_LINK_COMPONENTS add_llvm_unittest(SandboxVectorizerTests DependencyGraphTest.cpp + LegalityTest.cpp + RegionTest.cpp ) diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp new file mode 100644 index 000000000000..a136be41ae36 --- /dev/null +++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp @@ -0,0 +1,56 @@ +//===- LegalityTest.cpp ---------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h" +#include "llvm/AsmParser/Parser.h" +#include "llvm/SandboxIR/SandboxIR.h" +#include "llvm/Support/SourceMgr.h" +#include "gtest/gtest.h" + +using namespace llvm; + +struct LegalityTest : public testing::Test { + LLVMContext C; + std::unique_ptr<Module> M; + + void parseIR(LLVMContext &C, const char *IR) { + SMDiagnostic Err; + M = parseAssemblyString(IR, Err, C); + if (!M) + Err.print("LegalityTest", errs()); + } +}; + +TEST_F(LegalityTest, Legality) { + parseIR(C, R"IR( +define void @foo(ptr %ptr) { + %gep0 = getelementptr float, ptr %ptr, i32 0 + %gep1 = getelementptr float, ptr %ptr, i32 1 + %ld0 = load float, ptr %gep0 + %ld1 = load float, ptr %gep0 + store float %ld0, ptr %gep0 + store float %ld1, ptr %gep1 + ret void +} +)IR"); + llvm::Function *LLVMF = &*M->getFunction("foo"); + sandboxir::Context Ctx(C); + auto *F = Ctx.createFunction(LLVMF); + auto *BB = &*F->begin(); + auto It = BB->begin(); + [[maybe_unused]] auto *Gep0 = cast<sandboxir::GetElementPtrInst>(&*It++); + [[maybe_unused]] auto *Gep1 = cast<sandboxir::GetElementPtrInst>(&*It++); + [[maybe_unused]] auto *Ld0 = cast<sandboxir::LoadInst>(&*It++); + [[maybe_unused]] auto *Ld1 = cast<sandboxir::LoadInst>(&*It++); + auto *St0 = cast<sandboxir::StoreInst>(&*It++); + auto *St1 = cast<sandboxir::StoreInst>(&*It++); + + sandboxir::LegalityAnalysis Legality; + auto Result = Legality.canVectorize({St0, St1}); + EXPECT_TRUE(isa<sandboxir::Widen>(Result)); +} diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/RegionTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/RegionTest.cpp new file mode 100644 index 000000000000..2c7390c515f1 --- /dev/null +++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/RegionTest.cpp @@ -0,0 +1,81 @@ +//===- RegionTest.cpp -----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Vectorize/SandboxVectorizer/Region.h" +#include "llvm/AsmParser/Parser.h" +#include "llvm/SandboxIR/SandboxIR.h" +#include "llvm/Support/SourceMgr.h" +#include "gmock/gmock-matchers.h" +#include "gtest/gtest.h" + +using namespace llvm; + +struct RegionTest : public testing::Test { + LLVMContext C; + std::unique_ptr<Module> M; + + void parseIR(LLVMContext &C, const char *IR) { + SMDiagnostic Err; + M = parseAssemblyString(IR, Err, C); + if (!M) + Err.print("RegionTest", errs()); + } +}; + +TEST_F(RegionTest, Basic) { + parseIR(C, R"IR( +define i8 @foo(i8 %v0, i8 %v1) { + %t0 = add i8 %v0, 1 + %t1 = add i8 %t0, %v1 + ret i8 %t1 +} +)IR"); + llvm::Function *LLVMF = &*M->getFunction("foo"); + sandboxir::Context Ctx(C); + auto *F = Ctx.createFunction(LLVMF); + auto *BB = &*F->begin(); + auto It = BB->begin(); + auto *T0 = cast<sandboxir::Instruction>(&*It++); + auto *T1 = cast<sandboxir::Instruction>(&*It++); + auto *Ret = cast<sandboxir::Instruction>(&*It++); + sandboxir::Region Rgn(Ctx); + + // Check getters + EXPECT_EQ(&Ctx, &Rgn.getContext()); + EXPECT_EQ(0U, Rgn.getID()); + + // Check add / remove / empty. + EXPECT_TRUE(Rgn.empty()); + Rgn.add(T0); + EXPECT_FALSE(Rgn.empty()); + Rgn.remove(T0); + EXPECT_TRUE(Rgn.empty()); + + // Check iteration. + Rgn.add(T0); + Rgn.add(T1); + Rgn.add(Ret); + // Use an ordered matcher because we're supposed to preserve the insertion + // order for determinism. + EXPECT_THAT(Rgn.insts(), testing::ElementsAre(T0, T1, Ret)); + + // Check contains + EXPECT_TRUE(Rgn.contains(T0)); + Rgn.remove(T0); + EXPECT_FALSE(Rgn.contains(T0)); + +#ifndef NDEBUG + // Check equality comparison. Insert in reverse order into `Other` to check + // that comparison is order-independent. + sandboxir::Region Other(Ctx); + Other.add(Ret); + EXPECT_NE(Rgn, Other); + Other.add(T1); + EXPECT_EQ(Rgn, Other); +#endif +} diff --git a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp index 6dd6d860273c..4926afbfc6d8 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp @@ -101,13 +101,12 @@ TEST_F(VPlanHCFGTest, testBuildHCFGInnerLoop) { raw_string_ostream OS(FullDump); Plan->printDOT(OS); const char *ExpectedStr = R"(digraph VPlan { -graph [labelloc=t, fontsize=30; label="Vectorization Plan\n for UF\>=1\nLive-in vp\<%0\> = vector-trip-count\nvp\<%1\> = original trip-count\n"] +graph [labelloc=t, fontsize=30; label="Vectorization Plan\n for UF\>=1\nLive-in vp\<%0\> = vector-trip-count\nLive-in ir\<%N\> = original trip-count\n"] node [shape=rect, fontname=Courier, fontsize=30] edge [fontname=Courier, fontsize=30] compound=true N0 [label = "ir-bb\<entry\>:\l" + - " EMIT vp\<%1\> = EXPAND SCEV (-1 + %N)\l" + "No successors\l" ] N1 [label = @@ -134,8 +133,8 @@ compound=true N2 -> N4 [ label="" ltail=cluster_N3] N4 [label = "middle.block:\l" + - " EMIT vp\<%2\> = icmp eq vp\<%1\>, vp\<%0\>\l" + - " EMIT branch-on-cond vp\<%2\>\l" + + " EMIT vp\<%1\> = icmp eq ir\<%N\>, vp\<%0\>\l" + + " EMIT branch-on-cond vp\<%1\>\l" + "Successor(s): ir-bb\<for.end\>, scalar.ph\l" ] N4 -> N5 [ label="T"] diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h index e7b511904891..06e091da9054 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h +++ b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h @@ -67,10 +67,11 @@ protected: assert(!verifyFunction(F) && "input function must be valid"); doAnalysis(F); - auto Plan = VPlan::createInitialVPlan( - SE->getBackedgeTakenCount(LI->getLoopFor(LoopHeader)), *SE, true, false, - LI->getLoopFor(LoopHeader)); - VPlanHCFGBuilder HCFGBuilder(LI->getLoopFor(LoopHeader), LI.get(), *Plan); + Loop *L = LI->getLoopFor(LoopHeader); + PredicatedScalarEvolution PSE(*SE, *L); + auto Plan = VPlan::createInitialVPlan(IntegerType::get(*Ctx, 64), PSE, true, + false, L); + VPlanHCFGBuilder HCFGBuilder(L, LI.get(), *Plan); HCFGBuilder.buildHierarchicalCFG(); return Plan; } @@ -81,10 +82,11 @@ protected: assert(!verifyFunction(F) && "input function must be valid"); doAnalysis(F); - auto Plan = VPlan::createInitialVPlan( - SE->getBackedgeTakenCount(LI->getLoopFor(LoopHeader)), *SE, true, false, - LI->getLoopFor(LoopHeader)); - VPlanHCFGBuilder HCFGBuilder(LI->getLoopFor(LoopHeader), LI.get(), *Plan); + Loop *L = LI->getLoopFor(LoopHeader); + PredicatedScalarEvolution PSE(*SE, *L); + auto Plan = VPlan::createInitialVPlan(IntegerType::get(*Ctx, 64), PSE, true, + false, L); + VPlanHCFGBuilder HCFGBuilder(L, LI.get(), *Plan); HCFGBuilder.buildPlainCFG(); return Plan; } diff --git a/llvm/utils/TableGen/AsmMatcherEmitter.cpp b/llvm/utils/TableGen/AsmMatcherEmitter.cpp index 2a94b77af66c..0c03440903fc 100644 --- a/llvm/utils/TableGen/AsmMatcherEmitter.cpp +++ b/llvm/utils/TableGen/AsmMatcherEmitter.cpp @@ -634,10 +634,10 @@ struct MatchableInfo { // Compare lexicographically by operand. The matcher validates that other // orderings wouldn't be ambiguous using \see couldMatchAmbiguouslyWith(). - for (unsigned i = 0, e = AsmOperands.size(); i != e; ++i) { - if (*AsmOperands[i].Class < *RHS.AsmOperands[i].Class) + for (const auto &[LHSOp, RHSOp] : zip_equal(AsmOperands, RHS.AsmOperands)) { + if (*LHSOp.Class < *RHSOp.Class) return true; - if (*RHS.AsmOperands[i].Class < *AsmOperands[i].Class) + if (*RHSOp.Class < *LHSOp.Class) return false; } @@ -692,21 +692,21 @@ struct MatchableInfo { // Tokens and operand kinds are unambiguous (assuming a correct target // specific parser). - for (unsigned i = 0, e = AsmOperands.size(); i != e; ++i) - if (AsmOperands[i].Class->Kind != RHS.AsmOperands[i].Class->Kind || - AsmOperands[i].Class->Kind == ClassInfo::Token) - if (*AsmOperands[i].Class < *RHS.AsmOperands[i].Class || - *RHS.AsmOperands[i].Class < *AsmOperands[i].Class) + for (const auto &[LHSOp, RHSOp] : zip_equal(AsmOperands, RHS.AsmOperands)) { + if (LHSOp.Class->Kind != RHSOp.Class->Kind || + LHSOp.Class->Kind == ClassInfo::Token) + if (*LHSOp.Class < *RHSOp.Class || *RHSOp.Class < *LHSOp.Class) return false; + } // Otherwise, this operand could commute if all operands are equivalent, or // there is a pair of operands that compare less than and a pair that // compare greater than. bool HasLT = false, HasGT = false; - for (unsigned i = 0, e = AsmOperands.size(); i != e; ++i) { - if (*AsmOperands[i].Class < *RHS.AsmOperands[i].Class) + for (const auto &[LHSOp, RHSOp] : zip_equal(AsmOperands, RHS.AsmOperands)) { + if (*LHSOp.Class < *RHSOp.Class) HasLT = true; - if (*RHS.AsmOperands[i].Class < *AsmOperands[i].Class) + if (*RHSOp.Class < *LHSOp.Class) HasGT = true; } @@ -810,7 +810,7 @@ public: /// getSubtargetFeature - Lookup or create the subtarget feature info for the /// given operand. - const SubtargetFeatureInfo *getSubtargetFeature(Record *Def) const { + const SubtargetFeatureInfo *getSubtargetFeature(const Record *Def) const { assert(Def->isSubClassOf("Predicate") && "Invalid predicate type!"); const auto &I = SubtargetFeatures.find(Def); return I == SubtargetFeatures.end() ? nullptr : &I->second; @@ -833,9 +833,8 @@ LLVM_DUMP_METHOD void MatchableInfo::dump() const { errs() << " variant: " << AsmVariantID << "\n"; - for (unsigned i = 0, e = AsmOperands.size(); i != e; ++i) { - const AsmOperand &Op = AsmOperands[i]; - errs() << " op[" << i << "] = " << Op.Class->ClassName << " - "; + for (const auto &[Idx, Op] : enumerate(AsmOperands)) { + errs() << " op[" << Idx << "] = " << Op.Class->ClassName << " - "; errs() << '\"' << Op.Token << "\"\n"; } } @@ -1490,21 +1489,18 @@ void AsmMatcherInfo::buildOperandMatchInfo() { // Keep track of all operands of this instructions which belong to the // same class. unsigned NumOptionalOps = 0; - for (unsigned i = 0, e = MI->AsmOperands.size(); i != e; ++i) { - const MatchableInfo::AsmOperand &Op = MI->AsmOperands[i]; + for (const auto &[Idx, Op] : enumerate(MI->AsmOperands)) { if (CallCustomParserForAllOperands || !Op.Class->ParserMethod.empty()) { unsigned &OperandMask = OpClassMask[Op.Class]; OperandMask |= maskTrailingOnes<unsigned>(NumOptionalOps + 1) - << (i - NumOptionalOps); + << (Idx - NumOptionalOps); } if (Op.Class->IsOptional) ++NumOptionalOps; } // Generate operand match info for each mnemonic/operand class pair. - for (const auto &OCM : OpClassMask) { - unsigned OpMask = OCM.second; - ClassInfo *CI = OCM.first; + for (const auto [CI, OpMask] : OpClassMask) { OperandMatchInfo.push_back( OperandMatchEntry::create(MI.get(), CI, OpMask)); } @@ -1613,11 +1609,11 @@ void AsmMatcherInfo::buildInfo() { for (auto &II : Matchables) { // Parse the tokens after the mnemonic. // Note: buildInstructionOperandReference may insert new AsmOperands, so - // don't precompute the loop bound. - for (unsigned i = 0; i != II->AsmOperands.size(); ++i) { - MatchableInfo::AsmOperand &Op = II->AsmOperands[i]; + // don't precompute the loop bound, i.e., cannot use range based for loop + // here. + for (size_t Idx = 0; Idx < II->AsmOperands.size(); ++Idx) { + MatchableInfo::AsmOperand &Op = II->AsmOperands[Idx]; StringRef Token = Op.Token; - // Check for singleton registers. if (const Record *RegRecord = Op.SingletonReg) { Op.Class = RegisterClasses[RegRecord]; @@ -1645,7 +1641,7 @@ void AsmMatcherInfo::buildInfo() { OperandName = Token.substr(1); if (isa<const CodeGenInstruction *>(II->DefRec)) - buildInstructionOperandReference(II.get(), OperandName, i); + buildInstructionOperandReference(II.get(), OperandName, Idx); else buildAliasOperandReference(II.get(), OperandName, Op); } @@ -1779,21 +1775,21 @@ void AsmMatcherInfo::buildAliasOperandReference(MatchableInfo *II, const CodeGenInstAlias &CGA = *cast<const CodeGenInstAlias *>(II->DefRec); // Set up the operand class. - for (unsigned i = 0, e = CGA.ResultOperands.size(); i != e; ++i) - if (CGA.ResultOperands[i].isRecord() && - CGA.ResultOperands[i].getName() == OperandName) { + for (const auto &[ResultOp, SubOpIdx] : + zip_equal(CGA.ResultOperands, CGA.ResultInstOperandIndex)) { + if (ResultOp.isRecord() && ResultOp.getName() == OperandName) { // It's safe to go with the first one we find, because CodeGenInstAlias // validates that all operands with the same name have the same record. - Op.SubOpIdx = CGA.ResultInstOperandIndex[i].second; + Op.SubOpIdx = SubOpIdx.second; // Use the match class from the Alias definition, not the // destination instruction, as we may have an immediate that's // being munged by the match class. - Op.Class = - getOperandClass(CGA.ResultOperands[i].getRecord(), Op.SubOpIdx); + Op.Class = getOperandClass(ResultOp.getRecord(), Op.SubOpIdx); Op.SrcOpName = OperandName; Op.OrigSrcOpName = OperandName; return; } + } PrintFatalError(II->TheDef->getLoc(), "error: unable to find operand: '" + OperandName + "'"); @@ -1862,13 +1858,11 @@ void MatchableInfo::buildAliasResultOperands(bool AliasConstraintsAreChecked) { // populate them. unsigned AliasOpNo = 0; unsigned LastOpNo = CGA.ResultInstOperandIndex.size(); - for (unsigned i = 0, e = ResultInst->Operands.size(); i != e; ++i) { - const CGIOperandList::OperandInfo *OpInfo = &ResultInst->Operands[i]; - + for (const auto &[Idx, OpInfo] : enumerate(ResultInst->Operands)) { // If this is a tied operand, just copy from the previously handled operand. int TiedOp = -1; - if (OpInfo->MINumOperands == 1) - TiedOp = OpInfo->getTiedRegister(); + if (OpInfo.MINumOperands == 1) + TiedOp = OpInfo.getTiedRegister(); if (TiedOp != -1) { unsigned SrcOp1 = 0; unsigned SrcOp2 = 0; @@ -1898,7 +1892,7 @@ void MatchableInfo::buildAliasResultOperands(bool AliasConstraintsAreChecked) { // to benefit from the tied-operands check and just match the operand // as a normal, but not copy the original (TiedOp) to the result // instruction. We do this by passing -1 as the tied operand to copy. - if (ResultInst->Operands[i].Rec->getName() != + if (OpInfo.Rec->getName() != ResultInst->Operands[TiedOp].Rec->getName()) { SrcOp1 = ResOperands[TiedOp].AsmOperandNum; int SubIdx = CGA.ResultInstOperandIndex[AliasOpNo].second; @@ -1913,9 +1907,9 @@ void MatchableInfo::buildAliasResultOperands(bool AliasConstraintsAreChecked) { } // Handle all the suboperands for this operand. - const std::string &OpName = OpInfo->Name; + const std::string &OpName = OpInfo.Name; for (; AliasOpNo < LastOpNo && - CGA.ResultInstOperandIndex[AliasOpNo].first == i; + CGA.ResultInstOperandIndex[AliasOpNo].first == Idx; ++AliasOpNo) { int SubIdx = CGA.ResultInstOperandIndex[AliasOpNo].second; @@ -1935,7 +1929,7 @@ void MatchableInfo::buildAliasResultOperands(bool AliasConstraintsAreChecked) { // record won't be updated and it will fail later on. OperandRefs.try_emplace(Name, SrcOperand); - unsigned NumOperands = (SubIdx == -1 ? OpInfo->MINumOperands : 1); + unsigned NumOperands = (SubIdx == -1 ? OpInfo.MINumOperands : 1); ResOperands.push_back( ResOperand::getRenderedOp(SrcOperand, NumOperands)); break; @@ -2110,9 +2104,7 @@ emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName, // Compute the convert enum and the case body. MaxRowLength = std::max(MaxRowLength, II->ResOperands.size() * 2 + 1); - for (unsigned i = 0, e = II->ResOperands.size(); i != e; ++i) { - const MatchableInfo::ResOperand &OpInfo = II->ResOperands[i]; - + for (const auto &[Idx, OpInfo] : enumerate(II->ResOperands)) { // Generate code to populate each result operand. switch (OpInfo.Kind) { case MatchableInfo::ResOperand::RenderAsmOperand: { @@ -2194,7 +2186,7 @@ emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName, uint8_t TiedOp = OpInfo.TiedOperands.ResOpnd; uint8_t SrcOp1 = OpInfo.TiedOperands.SrcOpnd1Idx + HasMnemonicFirst; uint8_t SrcOp2 = OpInfo.TiedOperands.SrcOpnd2Idx + HasMnemonicFirst; - assert((i > TiedOp || TiedOp == (uint8_t)-1) && + assert((Idx > TiedOp || TiedOp == (uint8_t)-1) && "Tied operand precedes its target!"); auto TiedTupleName = std::string("Tie") + utostr(TiedOp) + '_' + utostr(SrcOp1) + '_' + utostr(SrcOp2); @@ -2730,26 +2722,21 @@ static void emitGetSubtargetFeatureName(AsmMatcherInfo &Info, raw_ostream &OS) { OS << "}\n\n"; } -static std::string GetAliasRequiredFeatures(Record *R, +static std::string GetAliasRequiredFeatures(const Record *R, const AsmMatcherInfo &Info) { - std::vector<Record *> ReqFeatures = R->getValueAsListOfDefs("Predicates"); std::string Result; - if (ReqFeatures.empty()) - return Result; - - for (unsigned i = 0, e = ReqFeatures.size(); i != e; ++i) { - const SubtargetFeatureInfo *F = Info.getSubtargetFeature(ReqFeatures[i]); - + bool First = true; + for (const Record *RF : R->getValueAsListOfDefs("Predicates")) { + const SubtargetFeatureInfo *F = Info.getSubtargetFeature(RF); if (!F) PrintFatalError(R->getLoc(), - "Predicate '" + ReqFeatures[i]->getName() + + "Predicate '" + RF->getName() + "' is not marked as an AssemblerPredicate!"); - - if (i) + if (!First) Result += " && "; - Result += "Features.test(" + F->getEnumBitName() + ')'; + First = false; } return Result; @@ -2778,16 +2765,14 @@ emitMnemonicAliasVariant(raw_ostream &OS, const AsmMatcherInfo &Info, // by the string remapper. std::vector<StringMatcher::StringPair> Cases; for (const auto &AliasEntry : AliasesFromMnemonic) { - const std::vector<Record *> &ToVec = AliasEntry.second; - // Loop through each alias and emit code that handles each case. If there // are two instructions without predicates, emit an error. If there is one, // emit it last. std::string MatchCode; int AliasWithNoPredicate = -1; - for (unsigned i = 0, e = ToVec.size(); i != e; ++i) { - Record *R = ToVec[i]; + ArrayRef<const Record *> ToVec = AliasEntry.second; + for (const auto &[Idx, R] : enumerate(ToVec)) { std::string FeatureMask = GetAliasRequiredFeatures(R, Info); // If this unconditionally matches, remember it for later and diagnose @@ -2804,7 +2789,7 @@ emitMnemonicAliasVariant(raw_ostream &OS, const AsmMatcherInfo &Info, PrintFatalError(R->getLoc(), "this is the other MnemonicAlias."); } - AliasWithNoPredicate = i; + AliasWithNoPredicate = Idx; continue; } if (R->getValueAsString("ToMnemonic") == AliasEntry.first) @@ -2819,7 +2804,7 @@ emitMnemonicAliasVariant(raw_ostream &OS, const AsmMatcherInfo &Info, } if (AliasWithNoPredicate != -1) { - Record *R = ToVec[AliasWithNoPredicate]; + const Record *R = ToVec[AliasWithNoPredicate]; if (!MatchCode.empty()) MatchCode += "else\n "; MatchCode += "Mnemonic = \""; @@ -2955,8 +2940,8 @@ emitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target, if (II.RequiredFeatures.empty()) OS << "_None"; else - for (unsigned i = 0, e = II.RequiredFeatures.size(); i != e; ++i) - OS << '_' << II.RequiredFeatures[i]->TheDef->getName(); + for (const auto &F : II.RequiredFeatures) + OS << '_' << F->TheDef->getName(); OS << " },\n"; } @@ -3467,24 +3452,20 @@ void AsmMatcherEmitter::run(raw_ostream &OS) { if (MI->RequiredFeatures.empty()) continue; FeatureBitsets.emplace_back(); - for (unsigned I = 0, E = MI->RequiredFeatures.size(); I != E; ++I) - FeatureBitsets.back().push_back(MI->RequiredFeatures[I]->TheDef); + for (const auto *F : MI->RequiredFeatures) + FeatureBitsets.back().push_back(F->TheDef); } - llvm::sort(FeatureBitsets, [&](const std::vector<const Record *> &A, - const std::vector<const Record *> &B) { - if (A.size() < B.size()) - return true; - if (A.size() > B.size()) - return false; - for (auto Pair : zip(A, B)) { - if (std::get<0>(Pair)->getName() < std::get<1>(Pair)->getName()) - return true; - if (std::get<0>(Pair)->getName() > std::get<1>(Pair)->getName()) - return false; - } - return false; - }); + llvm::sort(FeatureBitsets, + [&](ArrayRef<const Record *> A, ArrayRef<const Record *> B) { + if (A.size() != B.size()) + return A.size() < B.size(); + for (const auto [ARec, BRec] : zip_equal(A, B)) { + if (ARec->getName() != BRec->getName()) + return ARec->getName() < BRec->getName(); + } + return false; + }); FeatureBitsets.erase(llvm::unique(FeatureBitsets), FeatureBitsets.end()); OS << "// Feature bitsets.\n" << "enum : " << getMinimalTypeForRange(FeatureBitsets.size()) << " {\n" @@ -3577,8 +3558,8 @@ void AsmMatcherEmitter::run(raw_ostream &OS) { if (MI->RequiredFeatures.empty()) OS << "_None"; else - for (unsigned i = 0, e = MI->RequiredFeatures.size(); i != e; ++i) - OS << '_' << MI->RequiredFeatures[i]->TheDef->getName(); + for (const auto &F : MI->RequiredFeatures) + OS << '_' << F->TheDef->getName(); OS << ", { "; ListSeparator LS; diff --git a/llvm/utils/TableGen/AsmWriterEmitter.cpp b/llvm/utils/TableGen/AsmWriterEmitter.cpp index cbf3a380b442..3f1f937e5fd8 100644 --- a/llvm/utils/TableGen/AsmWriterEmitter.cpp +++ b/llvm/utils/TableGen/AsmWriterEmitter.cpp @@ -55,13 +55,13 @@ using namespace llvm; namespace { class AsmWriterEmitter { - RecordKeeper &Records; + const RecordKeeper &Records; CodeGenTarget Target; ArrayRef<const CodeGenInstruction *> NumberedInstructions; std::vector<AsmWriterInst> Instructions; public: - AsmWriterEmitter(RecordKeeper &R); + AsmWriterEmitter(const RecordKeeper &R); void run(raw_ostream &o); @@ -326,7 +326,7 @@ void AsmWriterEmitter::EmitGetMnemonic( raw_ostream &O, std::vector<std::vector<std::string>> &TableDrivenOperandPrinters, unsigned &BitsLeft, unsigned &AsmStrBits) { - Record *AsmWriter = Target.getAsmWriter(); + const Record *AsmWriter = Target.getAsmWriter(); StringRef ClassName = AsmWriter->getValueAsString("AsmWriterClassName"); bool PassSubtarget = AsmWriter->getValueAsInt("PassSubtarget"); @@ -486,7 +486,7 @@ void AsmWriterEmitter::EmitPrintInstruction( std::vector<std::vector<std::string>> &TableDrivenOperandPrinters, unsigned &BitsLeft, unsigned &AsmStrBits) { const unsigned OpcodeInfoBits = 64; - Record *AsmWriter = Target.getAsmWriter(); + const Record *AsmWriter = Target.getAsmWriter(); StringRef ClassName = AsmWriter->getValueAsString("AsmWriterClassName"); bool PassSubtarget = AsmWriter->getValueAsInt("PassSubtarget"); @@ -596,8 +596,8 @@ emitRegisterNameString(raw_ostream &O, StringRef AltName, AsmName = std::string(Reg.getName()); } else { // Make sure the register has an alternate name for this index. - std::vector<Record *> AltNameList = - Reg.TheDef->getValueAsListOfDefs("RegAltNameIndices"); + std::vector<const Record *> AltNameList = + Reg.TheDef->getValueAsListOfConstDefs("RegAltNameIndices"); unsigned Idx = 0, e; for (e = AltNameList.size(); Idx < e && (AltNameList[Idx]->getName() != AltName); ++Idx) @@ -633,7 +633,7 @@ emitRegisterNameString(raw_ostream &O, StringRef AltName, } void AsmWriterEmitter::EmitGetRegisterName(raw_ostream &O) { - Record *AsmWriter = Target.getAsmWriter(); + const Record *AsmWriter = Target.getAsmWriter(); StringRef ClassName = AsmWriter->getValueAsString("AsmWriterClassName"); const auto &Registers = Target.getRegBank().getRegisters(); ArrayRef<const Record *> AltNameIndices = Target.getRegAltNameIndices(); @@ -829,7 +829,7 @@ struct AliasPriorityComparator { } // end anonymous namespace void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) { - Record *AsmWriter = Target.getAsmWriter(); + const Record *AsmWriter = Target.getAsmWriter(); O << "\n#ifdef PRINT_ALIAS_INSTR\n"; O << "#undef PRINT_ALIAS_INSTR\n\n"; @@ -843,14 +843,11 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) { unsigned Variant = AsmWriter->getValueAsInt("Variant"); bool PassSubtarget = AsmWriter->getValueAsInt("PassSubtarget"); - std::vector<Record *> AllInstAliases = - Records.getAllDerivedDefinitions("InstAlias"); - // Create a map from the qualified name to a list of potential matches. typedef std::set<std::pair<CodeGenInstAlias, int>, AliasPriorityComparator> AliasWithPriority; std::map<std::string, AliasWithPriority> AliasMap; - for (Record *R : AllInstAliases) { + for (const Record *R : Records.getAllDerivedDefinitions("InstAlias")) { int Priority = R->getValueAsInt("EmitPriority"); if (Priority < 1) continue; // Aliases with priority 0 are never emitted. @@ -1011,17 +1008,17 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) { MIOpNum += RO.getMINumOperands(); } - std::vector<Record *> ReqFeatures; + std::vector<const Record *> ReqFeatures; if (PassSubtarget) { // We only consider ReqFeatures predicates if PassSubtarget - std::vector<Record *> RF = - CGA.TheDef->getValueAsListOfDefs("Predicates"); - copy_if(RF, std::back_inserter(ReqFeatures), [](Record *R) { + std::vector<const Record *> RF = + CGA.TheDef->getValueAsListOfConstDefs("Predicates"); + copy_if(RF, std::back_inserter(ReqFeatures), [](const Record *R) { return R->getValueAsBit("AssemblerMatcherPredicate"); }); } - for (Record *const R : ReqFeatures) { + for (const Record *R : ReqFeatures) { const DagInit *D = R->getValueAsDag("AssemblerCondDag"); auto *Op = dyn_cast<DefInit>(D->getOperator()); if (!Op) @@ -1315,17 +1312,17 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) { O << "#endif // PRINT_ALIAS_INSTR\n"; } -AsmWriterEmitter::AsmWriterEmitter(RecordKeeper &R) : Records(R), Target(R) { - Record *AsmWriter = Target.getAsmWriter(); +AsmWriterEmitter::AsmWriterEmitter(const RecordKeeper &R) + : Records(R), Target(R) { + const Record *AsmWriter = Target.getAsmWriter(); unsigned Variant = AsmWriter->getValueAsInt("Variant"); // Get the instruction numbering. NumberedInstructions = Target.getInstructionsByEnumValue(); - for (unsigned i = 0, e = NumberedInstructions.size(); i != e; ++i) { - const CodeGenInstruction *I = NumberedInstructions[i]; + for (const auto &[Idx, I] : enumerate(NumberedInstructions)) { if (!I->AsmString.empty() && I->TheDef->getName() != "PHI") - Instructions.emplace_back(*I, i, Variant); + Instructions.emplace_back(*I, Idx, Variant); } } diff --git a/llvm/utils/TableGen/CallingConvEmitter.cpp b/llvm/utils/TableGen/CallingConvEmitter.cpp index 6a3030bfc1b7..8876bb3ad31e 100644 --- a/llvm/utils/TableGen/CallingConvEmitter.cpp +++ b/llvm/utils/TableGen/CallingConvEmitter.cpp @@ -22,7 +22,7 @@ using namespace llvm; namespace { class CallingConvEmitter { - RecordKeeper &Records; + const RecordKeeper &Records; unsigned Counter = 0u; std::string CurrentAction; bool SwiftAction = false; @@ -32,13 +32,13 @@ class CallingConvEmitter { std::map<std::string, std::set<std::string>> DelegateToMap; public: - explicit CallingConvEmitter(RecordKeeper &R) : Records(R) {} + explicit CallingConvEmitter(const RecordKeeper &R) : Records(R) {} void run(raw_ostream &o); private: - void EmitCallingConv(Record *CC, raw_ostream &O); - void EmitAction(Record *Action, unsigned Indent, raw_ostream &O); + void EmitCallingConv(const Record *CC, raw_ostream &O); + void EmitAction(const Record *Action, unsigned Indent, raw_ostream &O); void EmitArgRegisterLists(raw_ostream &O); }; } // End anonymous namespace @@ -46,13 +46,14 @@ private: void CallingConvEmitter::run(raw_ostream &O) { emitSourceFileHeader("Calling Convention Implementation Fragment", O); - std::vector<Record *> CCs = Records.getAllDerivedDefinitions("CallingConv"); + ArrayRef<const Record *> CCs = + Records.getAllDerivedDefinitions("CallingConv"); // Emit prototypes for all of the non-custom CC's so that they can forward ref // each other. Records.startTimer("Emit prototypes"); O << "#ifndef GET_CC_REGISTER_LISTS\n\n"; - for (Record *CC : CCs) { + for (const Record *CC : CCs) { if (!CC->getValueAsBit("Custom")) { unsigned Pad = CC->getName().size(); if (CC->getValueAsBit("Entry")) { @@ -71,7 +72,7 @@ void CallingConvEmitter::run(raw_ostream &O) { // Emit each non-custom calling convention description in full. Records.startTimer("Emit full descriptions"); - for (Record *CC : CCs) { + for (const Record *CC : CCs) { if (!CC->getValueAsBit("Custom")) { EmitCallingConv(CC, O); } @@ -82,8 +83,8 @@ void CallingConvEmitter::run(raw_ostream &O) { O << "\n#endif // CC_REGISTER_LIST\n"; } -void CallingConvEmitter::EmitCallingConv(Record *CC, raw_ostream &O) { - ListInit *CCActions = CC->getValueAsListInit("Actions"); +void CallingConvEmitter::EmitCallingConv(const Record *CC, raw_ostream &O) { + const ListInit *CCActions = CC->getValueAsListInit("Actions"); Counter = 0; CurrentAction = CC->getName().str(); @@ -106,7 +107,7 @@ void CallingConvEmitter::EmitCallingConv(Record *CC, raw_ostream &O) { << std::string(Pad, ' ') << "ISD::ArgFlagsTy ArgFlags, CCState &State) {\n"; // Emit all of the actions, in order. for (unsigned i = 0, e = CCActions->size(); i != e; ++i) { - Record *Action = CCActions->getElementAsRecord(i); + const Record *Action = CCActions->getElementAsRecord(i); SwiftAction = llvm::any_of(Action->getSuperClasses(), [](const std::pair<Record *, SMRange> &Class) { @@ -122,7 +123,7 @@ void CallingConvEmitter::EmitCallingConv(Record *CC, raw_ostream &O) { O << "}\n"; } -void CallingConvEmitter::EmitAction(Record *Action, unsigned Indent, +void CallingConvEmitter::EmitAction(const Record *Action, unsigned Indent, raw_ostream &O) { std::string IndentStr = std::string(Indent, ' '); @@ -150,14 +151,14 @@ void CallingConvEmitter::EmitAction(Record *Action, unsigned Indent, O << IndentStr << "}\n"; } else { if (Action->isSubClassOf("CCDelegateTo")) { - Record *CC = Action->getValueAsDef("CC"); + const Record *CC = Action->getValueAsDef("CC"); O << IndentStr << "if (!" << CC->getName() << "(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))\n" << IndentStr << " return false;\n"; DelegateToMap[CurrentAction].insert(CC->getName().str()); } else if (Action->isSubClassOf("CCAssignToReg") || Action->isSubClassOf("CCAssignToRegAndStack")) { - ListInit *RegList = Action->getValueAsListInit("RegList"); + const ListInit *RegList = Action->getValueAsListInit("RegList"); if (RegList->size() == 1) { std::string Name = getQualifiedName(RegList->getElementAsRecord(0)); O << IndentStr << "if (MCRegister Reg = State.AllocateReg(" << Name @@ -210,8 +211,9 @@ void CallingConvEmitter::EmitAction(Record *Action, unsigned Indent, O << IndentStr << " return false;\n"; O << IndentStr << "}\n"; } else if (Action->isSubClassOf("CCAssignToRegWithShadow")) { - ListInit *RegList = Action->getValueAsListInit("RegList"); - ListInit *ShadowRegList = Action->getValueAsListInit("ShadowRegList"); + const ListInit *RegList = Action->getValueAsListInit("RegList"); + const ListInit *ShadowRegList = + Action->getValueAsListInit("ShadowRegList"); if (!ShadowRegList->empty() && ShadowRegList->size() != RegList->size()) PrintFatalError(Action->getLoc(), "Invalid length of list of shadowed registers"); @@ -278,7 +280,8 @@ void CallingConvEmitter::EmitAction(Record *Action, unsigned Indent, } else if (Action->isSubClassOf("CCAssignToStackWithShadow")) { int Size = Action->getValueAsInt("Size"); int Align = Action->getValueAsInt("Align"); - ListInit *ShadowRegList = Action->getValueAsListInit("ShadowRegList"); + const ListInit *ShadowRegList = + Action->getValueAsListInit("ShadowRegList"); unsigned ShadowRegListNumber = ++Counter; @@ -297,7 +300,7 @@ void CallingConvEmitter::EmitAction(Record *Action, unsigned Indent, << Counter << ", LocVT, LocInfo));\n"; O << IndentStr << "return false;\n"; } else if (Action->isSubClassOf("CCPromoteToType")) { - Record *DestTy = Action->getValueAsDef("DestTy"); + const Record *DestTy = Action->getValueAsDef("DestTy"); MVT::SimpleValueType DestVT = getValueType(DestTy); O << IndentStr << "LocVT = " << getEnumName(DestVT) << ";\n"; if (MVT(DestVT).isFloatingPoint()) { @@ -311,7 +314,7 @@ void CallingConvEmitter::EmitAction(Record *Action, unsigned Indent, << IndentStr << " LocInfo = CCValAssign::AExt;\n"; } } else if (Action->isSubClassOf("CCPromoteToUpperBitsInType")) { - Record *DestTy = Action->getValueAsDef("DestTy"); + const Record *DestTy = Action->getValueAsDef("DestTy"); MVT::SimpleValueType DestVT = getValueType(DestTy); O << IndentStr << "LocVT = " << getEnumName(DestVT) << ";\n"; if (MVT(DestVT).isFloatingPoint()) { @@ -327,17 +330,17 @@ void CallingConvEmitter::EmitAction(Record *Action, unsigned Indent, << IndentStr << " LocInfo = CCValAssign::AExtUpper;\n"; } } else if (Action->isSubClassOf("CCBitConvertToType")) { - Record *DestTy = Action->getValueAsDef("DestTy"); + const Record *DestTy = Action->getValueAsDef("DestTy"); O << IndentStr << "LocVT = " << getEnumName(getValueType(DestTy)) << ";\n"; O << IndentStr << "LocInfo = CCValAssign::BCvt;\n"; } else if (Action->isSubClassOf("CCTruncToType")) { - Record *DestTy = Action->getValueAsDef("DestTy"); + const Record *DestTy = Action->getValueAsDef("DestTy"); O << IndentStr << "LocVT = " << getEnumName(getValueType(DestTy)) << ";\n"; O << IndentStr << "LocInfo = CCValAssign::Trunc;\n"; } else if (Action->isSubClassOf("CCPassIndirect")) { - Record *DestTy = Action->getValueAsDef("DestTy"); + const Record *DestTy = Action->getValueAsDef("DestTy"); O << IndentStr << "LocVT = " << getEnumName(getValueType(DestTy)) << ";\n"; O << IndentStr << "LocInfo = CCValAssign::Indirect;\n"; diff --git a/llvm/utils/TableGen/CodeEmitterGen.cpp b/llvm/utils/TableGen/CodeEmitterGen.cpp index 69ca9a84953a..4d356774f98d 100644 --- a/llvm/utils/TableGen/CodeEmitterGen.cpp +++ b/llvm/utils/TableGen/CodeEmitterGen.cpp @@ -47,28 +47,30 @@ using namespace llvm; namespace { class CodeEmitterGen { - RecordKeeper &Records; + const RecordKeeper &Records; public: - CodeEmitterGen(RecordKeeper &R) : Records(R) {} + CodeEmitterGen(const RecordKeeper &R) : Records(R) {} void run(raw_ostream &o); private: - int getVariableBit(const std::string &VarName, BitsInit *BI, int bit); + int getVariableBit(const std::string &VarName, const BitsInit *BI, int bit); std::pair<std::string, std::string> - getInstructionCases(Record *R, CodeGenTarget &Target); - void addInstructionCasesForEncoding(Record *R, const Record *EncodingDef, - CodeGenTarget &Target, std::string &Case, + getInstructionCases(const Record *R, const CodeGenTarget &Target); + void addInstructionCasesForEncoding(const Record *R, + const Record *EncodingDef, + const CodeGenTarget &Target, + std::string &Case, std::string &BitOffsetCase); - bool addCodeToMergeInOperand(Record *R, BitsInit *BI, + bool addCodeToMergeInOperand(const Record *R, const BitsInit *BI, const std::string &VarName, std::string &Case, std::string &BitOffsetCase, - CodeGenTarget &Target); + const CodeGenTarget &Target); void emitInstructionBaseValues( raw_ostream &o, ArrayRef<const CodeGenInstruction *> NumberedInstructions, - CodeGenTarget &Target, unsigned HwMode = DefaultMode); + const CodeGenTarget &Target, unsigned HwMode = DefaultMode); void emitCaseMap(raw_ostream &o, const std::map<std::string, std::vector<std::string>> &CaseMap); @@ -78,13 +80,13 @@ private: // If the VarBitInit at position 'bit' matches the specified variable then // return the variable bit position. Otherwise return -1. -int CodeEmitterGen::getVariableBit(const std::string &VarName, BitsInit *BI, - int bit) { - if (VarBitInit *VBI = dyn_cast<VarBitInit>(BI->getBit(bit))) { - if (VarInit *VI = dyn_cast<VarInit>(VBI->getBitVar())) +int CodeEmitterGen::getVariableBit(const std::string &VarName, + const BitsInit *BI, int bit) { + if (const VarBitInit *VBI = dyn_cast<VarBitInit>(BI->getBit(bit))) { + if (const VarInit *VI = dyn_cast<VarInit>(VBI->getBitVar())) if (VI->getName() == VarName) return VBI->getBitNum(); - } else if (VarInit *VI = dyn_cast<VarInit>(BI->getBit(bit))) { + } else if (const VarInit *VI = dyn_cast<VarInit>(BI->getBit(bit))) { if (VI->getName() == VarName) return 0; } @@ -93,11 +95,12 @@ int CodeEmitterGen::getVariableBit(const std::string &VarName, BitsInit *BI, } // Returns true if it succeeds, false if an error. -bool CodeEmitterGen::addCodeToMergeInOperand(Record *R, BitsInit *BI, +bool CodeEmitterGen::addCodeToMergeInOperand(const Record *R, + const BitsInit *BI, const std::string &VarName, std::string &Case, std::string &BitOffsetCase, - CodeGenTarget &Target) { + const CodeGenTarget &Target) { CodeGenInstruction &CGI = Target.getInstruction(R); // Determine if VarName actually contributes to the Inst encoding. @@ -278,7 +281,8 @@ bool CodeEmitterGen::addCodeToMergeInOperand(Record *R, BitsInit *BI, } std::pair<std::string, std::string> -CodeEmitterGen::getInstructionCases(Record *R, CodeGenTarget &Target) { +CodeEmitterGen::getInstructionCases(const Record *R, + const CodeGenTarget &Target) { std::string Case, BitOffsetCase; auto append = [&](const std::string &S) { @@ -287,7 +291,7 @@ CodeEmitterGen::getInstructionCases(Record *R, CodeGenTarget &Target) { }; if (const RecordVal *RV = R->getValue("EncodingInfos")) { - if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) { + if (const auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) { const CodeGenHwModes &HWM = Target.getHwModes(); EncodingInfoByHwMode EBM(DI->getDef(), HWM); @@ -342,7 +346,7 @@ CodeEmitterGen::getInstructionCases(Record *R, CodeGenTarget &Target) { } void CodeEmitterGen::addInstructionCasesForEncoding( - Record *R, const Record *EncodingDef, CodeGenTarget &Target, + const Record *R, const Record *EncodingDef, const CodeGenTarget &Target, std::string &Case, std::string &BitOffsetCase) { BitsInit *BI = EncodingDef->getValueAsBitsInit("Inst"); @@ -394,7 +398,7 @@ static void emitInstBits(raw_ostream &OS, const APInt &Bits) { void CodeEmitterGen::emitInstructionBaseValues( raw_ostream &o, ArrayRef<const CodeGenInstruction *> NumberedInstructions, - CodeGenTarget &Target, unsigned HwMode) { + const CodeGenTarget &Target, unsigned HwMode) { const CodeGenHwModes &HWM = Target.getHwModes(); if (HwMode == DefaultMode) o << " static const uint64_t InstBits[] = {\n"; @@ -430,12 +434,12 @@ void CodeEmitterGen::emitInstructionBaseValues( } } } - BitsInit *BI = EncodingDef->getValueAsBitsInit("Inst"); + const BitsInit *BI = EncodingDef->getValueAsBitsInit("Inst"); // Start by filling in fixed values. APInt Value(BitWidth, 0); for (unsigned i = 0, e = BI->getNumBits(); i != e; ++i) { - if (auto *B = dyn_cast<BitInit>(BI->getBit(i)); B && B->getValue()) + if (const auto *B = dyn_cast<BitInit>(BI->getBit(i)); B && B->getValue()) Value.setBit(i); } o << " "; @@ -448,15 +452,13 @@ void CodeEmitterGen::emitInstructionBaseValues( void CodeEmitterGen::emitCaseMap( raw_ostream &o, const std::map<std::string, std::vector<std::string>> &CaseMap) { - std::map<std::string, std::vector<std::string>>::const_iterator IE, EE; - for (IE = CaseMap.begin(), EE = CaseMap.end(); IE != EE; ++IE) { - const std::string &Case = IE->first; - const std::vector<std::string> &InstList = IE->second; - - for (int i = 0, N = InstList.size(); i < N; i++) { - if (i) + for (const auto &[Case, InstList] : CaseMap) { + bool First = true; + for (const auto &Inst : InstList) { + if (!First) o << "\n"; - o << " case " << InstList[i] << ":"; + o << " case " << Inst << ":"; + First = false; } o << " {\n"; o << Case; @@ -469,7 +471,6 @@ void CodeEmitterGen::run(raw_ostream &o) { emitSourceFileHeader("Machine Code Emitter", o); CodeGenTarget Target(Records); - std::vector<Record *> Insts = Records.getAllDerivedDefinitions("Instruction"); // For little-endian instruction bit encodings, reverse the bit order Target.reverseBitsForLittleEndianEncoding(); @@ -491,17 +492,17 @@ void CodeEmitterGen::run(raw_ostream &o) { continue; if (const RecordVal *RV = R->getValue("EncodingInfos")) { - if (DefInit *DI = dyn_cast_or_null<DefInit>(RV->getValue())) { + if (const DefInit *DI = dyn_cast_or_null<DefInit>(RV->getValue())) { EncodingInfoByHwMode EBM(DI->getDef(), HWM); - for (auto &KV : EBM) { - BitsInit *BI = KV.second->getValueAsBitsInit("Inst"); + for (const auto &[Key, Value] : EBM) { + const BitsInit *BI = Value->getValueAsBitsInit("Inst"); BitWidth = std::max(BitWidth, BI->getNumBits()); - HwModes.insert(KV.first); + HwModes.insert(Key); } continue; } } - BitsInit *BI = R->getValueAsBitsInit("Inst"); + const BitsInit *BI = R->getValueAsBitsInit("Inst"); BitWidth = std::max(BitWidth, BI->getNumBits()); } UseAPInt = BitWidth > 64; @@ -540,7 +541,7 @@ void CodeEmitterGen::run(raw_ostream &o) { std::map<std::string, std::vector<std::string>> BitOffsetCaseMap; // Construct all cases statement for each opcode - for (Record *R : Insts) { + for (const Record *R : Records.getAllDerivedDefinitions("Instruction")) { if (R->getValueAsString("Namespace") == "TargetOpcode" || R->getValueAsBit("isPseudo")) continue; diff --git a/llvm/utils/TableGen/CodeGenMapTable.cpp b/llvm/utils/TableGen/CodeGenMapTable.cpp index 46aad7f7f8bd..b599ee149bcd 100644 --- a/llvm/utils/TableGen/CodeGenMapTable.cpp +++ b/llvm/utils/TableGen/CodeGenMapTable.cpp @@ -77,12 +77,15 @@ #include "Common/CodeGenInstruction.h" #include "Common/CodeGenTarget.h" +#include "TableGenBackends.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" -using namespace llvm; -typedef std::map<std::string, std::vector<Record *>> InstrRelMapTy; -typedef std::map<std::vector<Init *>, std::vector<Record *>> RowInstrMapTy; +using namespace llvm; +typedef std::map<std::string, std::vector<const Record *>> InstrRelMapTy; +typedef std::map<std::vector<const Init *>, std::vector<const Record *>> + RowInstrMapTy; namespace { @@ -92,13 +95,13 @@ class InstrMap { private: std::string Name; std::string FilterClass; - ListInit *RowFields; - ListInit *ColFields; - ListInit *KeyCol; - std::vector<ListInit *> ValueCols; + const ListInit *RowFields; + const ListInit *ColFields; + const ListInit *KeyCol; + std::vector<const ListInit *> ValueCols; public: - InstrMap(Record *MapRec) { + InstrMap(const Record *MapRec) { Name = std::string(MapRec->getName()); // FilterClass - It's used to reduce the search space only to the @@ -133,8 +136,8 @@ public: MapRec->getName() + "' has empty " + "`ValueCols' field!"); - for (Init *I : ColValList->getValues()) { - auto *ColI = cast<ListInit>(I); + for (const Init *I : ColValList->getValues()) { + const auto *ColI = cast<ListInit>(I); // Make sure that all the sub-lists in 'ValueCols' have same number of // elements as the fields in 'ColFields'. @@ -148,18 +151,12 @@ public: } const std::string &getName() const { return Name; } - const std::string &getFilterClass() const { return FilterClass; } - - ListInit *getRowFields() const { return RowFields; } - - ListInit *getColFields() const { return ColFields; } - - ListInit *getKeyCol() const { return KeyCol; } - - const std::vector<ListInit *> &getValueCols() const { return ValueCols; } + const ListInit *getRowFields() const { return RowFields; } + const ListInit *getColFields() const { return ColFields; } + const ListInit *getKeyCol() const { return KeyCol; } + ArrayRef<const ListInit *> getValueCols() const { return ValueCols; } }; -} // end anonymous namespace //===----------------------------------------------------------------------===// // class MapTableEmitter : It builds the instruction relation maps using @@ -167,7 +164,6 @@ public: // relationship maps as tables into XXXGenInstrInfo.inc file along with the // functions to query them. -namespace { class MapTableEmitter { private: // std::string TargetName; @@ -177,18 +173,19 @@ private: // InstrDefs - list of instructions filtered using FilterClass defined // in InstrMapDesc. - std::vector<Record *> InstrDefs; + ArrayRef<const Record *> InstrDefs; // RowInstrMap - maps RowFields values to the instructions. It's keyed by the // values of the row fields and contains vector of records as values. RowInstrMapTy RowInstrMap; // KeyInstrVec - list of key instructions. - std::vector<Record *> KeyInstrVec; - DenseMap<const Record *, std::vector<Record *>> MapTable; + std::vector<const Record *> KeyInstrVec; + DenseMap<const Record *, std::vector<const Record *>> MapTable; public: - MapTableEmitter(CodeGenTarget &Target, RecordKeeper &Records, Record *IMRec) + MapTableEmitter(const CodeGenTarget &Target, const RecordKeeper &Records, + const Record *IMRec) : Target(Target), InstrMapDesc(IMRec) { const std::string &FilterClass = InstrMapDesc.getFilterClass(); InstrDefs = Records.getAllDerivedDefinitions(FilterClass); @@ -198,11 +195,12 @@ public: // Returns true if an instruction is a key instruction, i.e., its ColFields // have same values as KeyCol. - bool isKeyColInstr(Record *CurInstr); + bool isKeyColInstr(const Record *CurInstr); // Find column instruction corresponding to a key instruction based on the // constraints for that column. - Record *getInstrForColumn(Record *KeyInstr, ListInit *CurValueCol); + const Record *getInstrForColumn(const Record *KeyInstr, + const ListInit *CurValueCol); // Find column instructions for each key instruction based // on ValueCols and store them into MapTable. @@ -226,17 +224,17 @@ public: //===----------------------------------------------------------------------===// void MapTableEmitter::buildRowInstrMap() { - for (Record *CurInstr : InstrDefs) { - std::vector<Init *> KeyValue; - ListInit *RowFields = InstrMapDesc.getRowFields(); - for (Init *RowField : RowFields->getValues()) { - RecordVal *RecVal = CurInstr->getValue(RowField); + for (const Record *CurInstr : InstrDefs) { + std::vector<const Init *> KeyValue; + const ListInit *RowFields = InstrMapDesc.getRowFields(); + for (const Init *RowField : RowFields->getValues()) { + const RecordVal *RecVal = CurInstr->getValue(RowField); if (RecVal == nullptr) PrintFatalError(CurInstr->getLoc(), "No value " + RowField->getAsString() + " found in \"" + CurInstr->getName() + "\" instruction description."); - Init *CurInstrVal = RecVal->getValue(); + const Init *CurInstrVal = RecVal->getValue(); KeyValue.push_back(CurInstrVal); } @@ -254,18 +252,19 @@ void MapTableEmitter::buildRowInstrMap() { // Return true if an instruction is a KeyCol instruction. //===----------------------------------------------------------------------===// -bool MapTableEmitter::isKeyColInstr(Record *CurInstr) { - ListInit *ColFields = InstrMapDesc.getColFields(); - ListInit *KeyCol = InstrMapDesc.getKeyCol(); +bool MapTableEmitter::isKeyColInstr(const Record *CurInstr) { + const ListInit *ColFields = InstrMapDesc.getColFields(); + const ListInit *KeyCol = InstrMapDesc.getKeyCol(); // Check if the instruction is a KeyCol instruction. bool MatchFound = true; for (unsigned j = 0, endCF = ColFields->size(); (j < endCF) && MatchFound; j++) { - RecordVal *ColFieldName = CurInstr->getValue(ColFields->getElement(j)); + const RecordVal *ColFieldName = + CurInstr->getValue(ColFields->getElement(j)); std::string CurInstrVal = ColFieldName->getValue()->getAsUnquotedString(); std::string KeyColValue = KeyCol->getElement(j)->getAsUnquotedString(); - MatchFound = (CurInstrVal == KeyColValue); + MatchFound = CurInstrVal == KeyColValue; } return MatchFound; } @@ -278,15 +277,15 @@ bool MapTableEmitter::isKeyColInstr(Record *CurInstr) { void MapTableEmitter::buildMapTable() { // Find column instructions for a given key based on the ColField // constraints. - const std::vector<ListInit *> &ValueCols = InstrMapDesc.getValueCols(); + ArrayRef<const ListInit *> ValueCols = InstrMapDesc.getValueCols(); unsigned NumOfCols = ValueCols.size(); - for (Record *CurKeyInstr : KeyInstrVec) { - std::vector<Record *> ColInstrVec(NumOfCols); + for (const Record *CurKeyInstr : KeyInstrVec) { + std::vector<const Record *> ColInstrVec(NumOfCols); // Find the column instruction based on the constraints for the column. for (unsigned ColIdx = 0; ColIdx < NumOfCols; ColIdx++) { - ListInit *CurValueCol = ValueCols[ColIdx]; - Record *ColInstr = getInstrForColumn(CurKeyInstr, CurValueCol); + const ListInit *CurValueCol = ValueCols[ColIdx]; + const Record *ColInstr = getInstrForColumn(CurKeyInstr, CurValueCol); ColInstrVec[ColIdx] = ColInstr; } MapTable[CurKeyInstr] = ColInstrVec; @@ -297,14 +296,14 @@ void MapTableEmitter::buildMapTable() { // Find column instruction based on the constraints for that column. //===----------------------------------------------------------------------===// -Record *MapTableEmitter::getInstrForColumn(Record *KeyInstr, - ListInit *CurValueCol) { - ListInit *RowFields = InstrMapDesc.getRowFields(); - std::vector<Init *> KeyValue; +const Record *MapTableEmitter::getInstrForColumn(const Record *KeyInstr, + const ListInit *CurValueCol) { + const ListInit *RowFields = InstrMapDesc.getRowFields(); + std::vector<const Init *> KeyValue; // Construct KeyValue using KeyInstr's values for RowFields. - for (Init *RowField : RowFields->getValues()) { - Init *KeyInstrVal = KeyInstr->getValue(RowField)->getValue(); + for (const Init *RowField : RowFields->getValues()) { + const Init *KeyInstrVal = KeyInstr->getValue(RowField)->getValue(); KeyValue.push_back(KeyInstrVal); } @@ -312,20 +311,20 @@ Record *MapTableEmitter::getInstrForColumn(Record *KeyInstr, // in RowInstrMap. We search through these instructions to find a match // for the current column, i.e., the instruction which has the same values // as CurValueCol for all the fields in ColFields. - const std::vector<Record *> &RelatedInstrVec = RowInstrMap[KeyValue]; + ArrayRef<const Record *> RelatedInstrVec = RowInstrMap[KeyValue]; - ListInit *ColFields = InstrMapDesc.getColFields(); - Record *MatchInstr = nullptr; + const ListInit *ColFields = InstrMapDesc.getColFields(); + const Record *MatchInstr = nullptr; - for (llvm::Record *CurInstr : RelatedInstrVec) { + for (const Record *CurInstr : RelatedInstrVec) { bool MatchFound = true; for (unsigned j = 0, endCF = ColFields->size(); (j < endCF) && MatchFound; j++) { - Init *ColFieldJ = ColFields->getElement(j); - Init *CurInstrInit = CurInstr->getValue(ColFieldJ)->getValue(); + const Init *ColFieldJ = ColFields->getElement(j); + const Init *CurInstrInit = CurInstr->getValue(ColFieldJ)->getValue(); std::string CurInstrVal = CurInstrInit->getAsUnquotedString(); - Init *ColFieldJVallue = CurValueCol->getElement(j); - MatchFound = (CurInstrVal == ColFieldJVallue->getAsUnquotedString()); + const Init *ColFieldJVallue = CurValueCol->getElement(j); + MatchFound = CurInstrVal == ColFieldJVallue->getAsUnquotedString(); } if (MatchFound) { @@ -333,7 +332,7 @@ Record *MapTableEmitter::getInstrForColumn(Record *KeyInstr, // Already had a match // Error if multiple matches are found for a column. std::string KeyValueStr; - for (Init *Value : KeyValue) { + for (const Init *Value : KeyValue) { if (!KeyValueStr.empty()) KeyValueStr += ", "; KeyValueStr += Value->getAsString(); @@ -357,11 +356,10 @@ Record *MapTableEmitter::getInstrForColumn(Record *KeyInstr, //===----------------------------------------------------------------------===// unsigned MapTableEmitter::emitBinSearchTable(raw_ostream &OS) { - ArrayRef<const CodeGenInstruction *> NumberedInstructions = Target.getInstructionsByEnumValue(); StringRef Namespace = Target.getInstNamespace(); - const std::vector<ListInit *> &ValueCols = InstrMapDesc.getValueCols(); + ArrayRef<const ListInit *> ValueCols = InstrMapDesc.getValueCols(); unsigned NumCol = ValueCols.size(); unsigned TotalNumInstr = NumberedInstructions.size(); unsigned TableSize = 0; @@ -372,7 +370,7 @@ unsigned MapTableEmitter::emitBinSearchTable(raw_ostream &OS) { OS << "Table[][" << NumCol + 1 << "] = {\n"; for (unsigned i = 0; i < TotalNumInstr; i++) { const Record *CurInstr = NumberedInstructions[i]->TheDef; - std::vector<Record *> ColInstrs = MapTable[CurInstr]; + ArrayRef<const Record *> ColInstrs = MapTable[CurInstr]; std::string OutStr; unsigned RelExists = 0; if (!ColInstrs.empty()) { @@ -434,8 +432,8 @@ void MapTableEmitter::emitBinSearch(raw_ostream &OS, unsigned TableSize) { void MapTableEmitter::emitMapFuncBody(raw_ostream &OS, unsigned TableSize) { - ListInit *ColFields = InstrMapDesc.getColFields(); - const std::vector<ListInit *> &ValueCols = InstrMapDesc.getValueCols(); + const ListInit *ColFields = InstrMapDesc.getColFields(); + ArrayRef<const ListInit *> ValueCols = InstrMapDesc.getValueCols(); // Emit binary search algorithm to locate instructions in the // relation table. If found, return opcode value from the appropriate column @@ -444,7 +442,7 @@ void MapTableEmitter::emitMapFuncBody(raw_ostream &OS, unsigned TableSize) { if (ValueCols.size() > 1) { for (unsigned i = 0, e = ValueCols.size(); i < e; i++) { - ListInit *ColumnI = ValueCols[i]; + const ListInit *ColumnI = ValueCols[i]; OS << " if ("; for (unsigned j = 0, ColSize = ColumnI->size(); j < ColSize; ++j) { std::string ColName = ColFields->getElement(j)->getAsUnquotedString(); @@ -476,8 +474,8 @@ void MapTableEmitter::emitTablesWithFunc(raw_ostream &OS) { // since first column is used for the key instructions), then we also need // to pass another input to indicate the column to be selected. - ListInit *ColFields = InstrMapDesc.getColFields(); - const std::vector<ListInit *> &ValueCols = InstrMapDesc.getValueCols(); + const ListInit *ColFields = InstrMapDesc.getColFields(); + ArrayRef<const ListInit *> ValueCols = InstrMapDesc.getValueCols(); OS << "// " << InstrMapDesc.getName() << "\nLLVM_READONLY\n"; OS << "int " << InstrMapDesc.getName() << "(uint16_t Opcode"; if (ValueCols.size() > 1) { @@ -499,23 +497,20 @@ void MapTableEmitter::emitTablesWithFunc(raw_ostream &OS) { // Emit enums for the column fields across all the instruction maps. //===----------------------------------------------------------------------===// -static void emitEnums(raw_ostream &OS, RecordKeeper &Records) { - - std::vector<Record *> InstrMapVec; - InstrMapVec = Records.getAllDerivedDefinitions("InstrMapping"); - std::map<std::string, std::vector<Init *>> ColFieldValueMap; +static void emitEnums(raw_ostream &OS, const RecordKeeper &Records) { + std::map<std::string, std::vector<const Init *>> ColFieldValueMap; // Iterate over all InstrMapping records and create a map between column // fields and their possible values across all records. - for (Record *CurMap : InstrMapVec) { - ListInit *ColFields; - ColFields = CurMap->getValueAsListInit("ColFields"); - ListInit *List = CurMap->getValueAsListInit("ValueCols"); - std::vector<ListInit *> ValueCols; + for (const Record *CurMap : + Records.getAllDerivedDefinitions("InstrMapping")) { + const ListInit *ColFields = CurMap->getValueAsListInit("ColFields"); + const ListInit *List = CurMap->getValueAsListInit("ValueCols"); + std::vector<const ListInit *> ValueCols; unsigned ListSize = List->size(); for (unsigned j = 0; j < ListSize; j++) { - auto *ListJ = cast<ListInit>(List->getElement(j)); + const auto *ListJ = cast<ListInit>(List->getElement(j)); if (ListJ->size() != ColFields->size()) PrintFatalError("Record `" + CurMap->getName() + @@ -533,12 +528,10 @@ static void emitEnums(raw_ostream &OS, RecordKeeper &Records) { } } - for (auto &Entry : ColFieldValueMap) { - std::vector<Init *> FieldValues = Entry.second; - + for (auto &[EnumName, FieldValues] : ColFieldValueMap) { // Delete duplicate entries from ColFieldValueMap for (unsigned i = 0; i < FieldValues.size() - 1; i++) { - Init *CurVal = FieldValues[i]; + const Init *CurVal = FieldValues[i]; for (unsigned j = i + 1; j < FieldValues.size(); j++) { if (CurVal == FieldValues[j]) { FieldValues.erase(FieldValues.begin() + j); @@ -548,28 +541,24 @@ static void emitEnums(raw_ostream &OS, RecordKeeper &Records) { } // Emit enumerated values for the column fields. - OS << "enum " << Entry.first << " {\n"; - for (unsigned i = 0, endFV = FieldValues.size(); i < endFV; i++) { - OS << "\t" << Entry.first << "_" << FieldValues[i]->getAsUnquotedString(); - if (i != endFV - 1) - OS << ",\n"; - else - OS << "\n};\n\n"; - } + OS << "enum " << EnumName << " {\n"; + ListSeparator LS(",\n"); + for (const Init *Field : FieldValues) + OS << LS << "\t" << EnumName << "_" << Field->getAsUnquotedString(); + OS << "\n};\n\n"; } } -namespace llvm { //===----------------------------------------------------------------------===// // Parse 'InstrMapping' records and use the information to form relationship // between instructions. These relations are emitted as a tables along with the // functions to query them. //===----------------------------------------------------------------------===// -void EmitMapTable(RecordKeeper &Records, raw_ostream &OS) { +void llvm::EmitMapTable(const RecordKeeper &Records, raw_ostream &OS) { CodeGenTarget Target(Records); StringRef NameSpace = Target.getInstNamespace(); - std::vector<Record *> InstrMapVec; - InstrMapVec = Records.getAllDerivedDefinitions("InstrMapping"); + ArrayRef<const Record *> InstrMapVec = + Records.getAllDerivedDefinitions("InstrMapping"); if (InstrMapVec.empty()) return; @@ -585,7 +574,7 @@ void EmitMapTable(RecordKeeper &Records, raw_ostream &OS) { // Iterate over all instruction mapping records and construct relationship // maps based on the information specified there. // - for (Record *CurMap : InstrMapVec) { + for (const Record *CurMap : InstrMapVec) { MapTableEmitter IMap(Target, Records, CurMap); // Build RowInstrMap to group instructions based on their values for @@ -604,5 +593,3 @@ void EmitMapTable(RecordKeeper &Records, raw_ostream &OS) { OS << "} // end namespace llvm\n"; OS << "#endif // GET_INSTRMAP_INFO\n\n"; } - -} // namespace llvm diff --git a/llvm/utils/TableGen/Common/CodeGenSchedule.cpp b/llvm/utils/TableGen/Common/CodeGenSchedule.cpp index 33d1da2f848b..de2cb67b1f1d 100644 --- a/llvm/utils/TableGen/Common/CodeGenSchedule.cpp +++ b/llvm/utils/TableGen/Common/CodeGenSchedule.cpp @@ -271,8 +271,7 @@ void CodeGenSchedModels::checkSTIPredicates() const { // Disallow InstructionEquivalenceClasses with an empty instruction list. for (const Record *R : Records.getAllDerivedDefinitions("InstructionEquivalenceClass")) { - RecVec Opcodes = R->getValueAsListOfDefs("Opcodes"); - if (Opcodes.empty()) { + if (R->getValueAsListOfDefs("Opcodes").empty()) { PrintFatalError(R->getLoc(), "Invalid InstructionEquivalenceClass " "defined with an empty opcode list."); } @@ -311,13 +310,13 @@ static void processSTIPredicate(STIPredicateFunction &Fn, // definitions. Each unique opcode will be associated with an OpcodeInfo // object. for (const Record *Def : Fn.getDefinitions()) { - RecVec Classes = Def->getValueAsListOfDefs("Classes"); + ConstRecVec Classes = Def->getValueAsListOfConstDefs("Classes"); for (const Record *EC : Classes) { const Record *Pred = EC->getValueAsDef("Predicate"); if (!Predicate2Index.contains(Pred)) Predicate2Index[Pred] = NumUniquePredicates++; - RecVec Opcodes = EC->getValueAsListOfDefs("Opcodes"); + ConstRecVec Opcodes = EC->getValueAsListOfConstDefs("Opcodes"); for (const Record *Opcode : Opcodes) { if (!Opcode2Index.contains(Opcode)) { Opcode2Index[Opcode] = OpcodeMappings.size(); @@ -342,14 +341,14 @@ static void processSTIPredicate(STIPredicateFunction &Fn, // Construct a OpcodeInfo object for every unique opcode declared by an // InstructionEquivalenceClass definition. for (const Record *Def : Fn.getDefinitions()) { - RecVec Classes = Def->getValueAsListOfDefs("Classes"); + ConstRecVec Classes = Def->getValueAsListOfConstDefs("Classes"); const Record *SchedModel = Def->getValueAsDef("SchedModel"); unsigned ProcIndex = ProcModelMap.find(SchedModel)->second; APInt ProcMask(ProcModelMap.size(), 0); ProcMask.setBit(ProcIndex); for (const Record *EC : Classes) { - RecVec Opcodes = EC->getValueAsListOfDefs("Opcodes"); + ConstRecVec Opcodes = EC->getValueAsListOfConstDefs("Opcodes"); std::vector<int64_t> OpIndices = EC->getValueAsListOfInts("OperandIndices"); @@ -579,8 +578,7 @@ static void scanSchedRW(const Record *RWDef, ConstRecVec &RWDefs, // Visit each variant (guarded by a different predicate). for (const Record *Variant : RWDef->getValueAsListOfDefs("Variants")) { // Visit each RW in the sequence selected by the current variant. - RecVec Selected = Variant->getValueAsListOfDefs("Selected"); - for (Record *SelDef : Selected) + for (const Record *SelDef : Variant->getValueAsListOfDefs("Selected")) scanSchedRW(SelDef, RWDefs, RWSet); } } @@ -601,8 +599,7 @@ void CodeGenSchedModels::collectSchedRW() { const Record *SchedDef = Inst->TheDef; if (SchedDef->isValueUnset("SchedRW")) continue; - RecVec RWs = SchedDef->getValueAsListOfDefs("SchedRW"); - for (Record *RW : RWs) { + for (const Record *RW : SchedDef->getValueAsListOfDefs("SchedRW")) { if (RW->isSubClassOf("SchedWrite")) scanSchedRW(RW, SWDefs, RWSet); else { @@ -614,8 +611,8 @@ void CodeGenSchedModels::collectSchedRW() { // Find all ReadWrites referenced by InstRW. for (const Record *InstRWDef : Records.getAllDerivedDefinitions("InstRW")) { // For all OperandReadWrites. - RecVec RWDefs = InstRWDef->getValueAsListOfDefs("OperandReadWrites"); - for (Record *RWDef : RWDefs) { + for (const Record *RWDef : + InstRWDef->getValueAsListOfDefs("OperandReadWrites")) { if (RWDef->isSubClassOf("SchedWrite")) scanSchedRW(RWDef, SWDefs, RWSet); else { @@ -627,8 +624,8 @@ void CodeGenSchedModels::collectSchedRW() { // Find all ReadWrites referenced by ItinRW. for (const Record *ItinRWDef : Records.getAllDerivedDefinitions("ItinRW")) { // For all OperandReadWrites. - RecVec RWDefs = ItinRWDef->getValueAsListOfDefs("OperandReadWrites"); - for (Record *RWDef : RWDefs) { + for (const Record *RWDef : + ItinRWDef->getValueAsListOfDefs("OperandReadWrites")) { if (RWDef->isSubClassOf("SchedWrite")) scanSchedRW(RWDef, SWDefs, RWSet); else { @@ -672,7 +669,7 @@ void CodeGenSchedModels::collectSchedRW() { for (CodeGenSchedRW &CGRW : SchedWrites) { if (!CGRW.IsSequence) continue; - findRWs(CGRW.TheDef->getValueAsListOfDefs("Writes"), CGRW.Sequence, + findRWs(CGRW.TheDef->getValueAsListOfConstDefs("Writes"), CGRW.Sequence, /*IsRead=*/false); } // Initialize Aliases vectors. @@ -726,9 +723,10 @@ unsigned CodeGenSchedModels::getSchedRWIdx(const Record *Def, return I == RWVec.end() ? 0 : std::distance(RWVec.begin(), I); } -static void splitSchedReadWrites(const RecVec &RWDefs, RecVec &WriteDefs, - RecVec &ReadDefs) { - for (Record *RWDef : RWDefs) { +static void splitSchedReadWrites(const ConstRecVec &RWDefs, + ConstRecVec &WriteDefs, + ConstRecVec &ReadDefs) { + for (const Record *RWDef : RWDefs) { if (RWDef->isSubClassOf("SchedWrite")) WriteDefs.push_back(RWDef); else { @@ -739,19 +737,19 @@ static void splitSchedReadWrites(const RecVec &RWDefs, RecVec &WriteDefs, } // Split the SchedReadWrites defs and call findRWs for each list. -void CodeGenSchedModels::findRWs(const RecVec &RWDefs, IdxVec &Writes, +void CodeGenSchedModels::findRWs(const ConstRecVec &RWDefs, IdxVec &Writes, IdxVec &Reads) const { - RecVec WriteDefs; - RecVec ReadDefs; + ConstRecVec WriteDefs; + ConstRecVec ReadDefs; splitSchedReadWrites(RWDefs, WriteDefs, ReadDefs); findRWs(WriteDefs, Writes, false); findRWs(ReadDefs, Reads, true); } // Call getSchedRWIdx for all elements in a sequence of SchedRW defs. -void CodeGenSchedModels::findRWs(const RecVec &RWDefs, IdxVec &RWs, +void CodeGenSchedModels::findRWs(const ConstRecVec &RWDefs, IdxVec &RWs, bool IsRead) const { - for (Record *RWDef : RWDefs) { + for (const Record *RWDef : RWDefs) { unsigned Idx = getSchedRWIdx(RWDef, IsRead); assert(Idx && "failed to collect SchedReadWrite"); RWs.push_back(Idx); @@ -859,7 +857,8 @@ void CodeGenSchedModels::collectSchedClasses() { Record *ItinDef = Inst->TheDef->getValueAsDef("Itinerary"); IdxVec Writes, Reads; if (!Inst->TheDef->isValueUnset("SchedRW")) - findRWs(Inst->TheDef->getValueAsListOfDefs("SchedRW"), Writes, Reads); + findRWs(Inst->TheDef->getValueAsListOfConstDefs("SchedRW"), Writes, + Reads); // ProcIdx == 0 indicates the class applies to all processors. unsigned SCIdx = addSchedClass(ItinDef, Writes, Reads, /*ProcIndices*/ {0}); @@ -921,7 +920,8 @@ void CodeGenSchedModels::collectSchedClasses() { << InstName); IdxVec Writes; IdxVec Reads; - findRWs(RWDef->getValueAsListOfDefs("OperandReadWrites"), Writes, Reads); + findRWs(RWDef->getValueAsListOfConstDefs("OperandReadWrites"), Writes, + Reads); LLVM_DEBUG({ for (unsigned WIdx : Writes) dbgs() << " " << SchedWrites[WIdx].Name; @@ -950,10 +950,9 @@ CodeGenSchedModels::getSchedClassIdx(const CodeGenInstruction &Inst) const { } std::string -CodeGenSchedModels::createSchedClassName(Record *ItinClassDef, +CodeGenSchedModels::createSchedClassName(const Record *ItinClassDef, ArrayRef<unsigned> OperWrites, ArrayRef<unsigned> OperReads) { - std::string Name; if (ItinClassDef && ItinClassDef->getName() != "NoItinerary") Name = std::string(ItinClassDef->getName()); @@ -983,7 +982,7 @@ CodeGenSchedModels::createSchedClassName(const ConstRecVec &InstDefs) { /// Add an inferred sched class from an itinerary class and per-operand list of /// SchedWrites and SchedReads. ProcIndices contains the set of IDs of /// processors that may utilize this class. -unsigned CodeGenSchedModels::addSchedClass(Record *ItinClassDef, +unsigned CodeGenSchedModels::addSchedClass(const Record *ItinClassDef, ArrayRef<unsigned> OperWrites, ArrayRef<unsigned> OperReads, ArrayRef<unsigned> ProcIndices) { @@ -1131,7 +1130,8 @@ void CodeGenSchedModels::collectProcItins() { if (!ProcModel.hasItineraries()) continue; - RecVec ItinRecords = ProcModel.ItinsDef->getValueAsListOfDefs("IID"); + ConstRecVec ItinRecords = + ProcModel.ItinsDef->getValueAsListOfConstDefs("IID"); assert(!ItinRecords.empty() && "ProcModel.hasItineraries is incorrect"); // Populate ItinDefList with Itinerary records. @@ -1139,7 +1139,7 @@ void CodeGenSchedModels::collectProcItins() { // Insert each itinerary data record in the correct position within // the processor model's ItinDefList. - for (Record *ItinData : ItinRecords) { + for (const Record *ItinData : ItinRecords) { const Record *ItinDef = ItinData->getValueAsDef("TheClass"); bool FoundClass = false; @@ -1217,14 +1217,15 @@ void CodeGenSchedModels::inferSchedClasses() { } /// Infer classes from per-processor itinerary resources. -void CodeGenSchedModels::inferFromItinClass(Record *ItinClassDef, +void CodeGenSchedModels::inferFromItinClass(const Record *ItinClassDef, unsigned FromClassIdx) { for (unsigned PIdx = 0, PEnd = ProcModels.size(); PIdx != PEnd; ++PIdx) { const CodeGenProcModel &PM = ProcModels[PIdx]; // For all ItinRW entries. bool HasMatch = false; for (const Record *Rec : PM.ItinRWDefs) { - RecVec Matched = Rec->getValueAsListOfDefs("MatchedItinClasses"); + ConstRecVec Matched = + Rec->getValueAsListOfConstDefs("MatchedItinClasses"); if (!llvm::is_contained(Matched, ItinClassDef)) continue; if (HasMatch) @@ -1233,7 +1234,8 @@ void CodeGenSchedModels::inferFromItinClass(Record *ItinClassDef, " in ItinResources for " + PM.ModelName); HasMatch = true; IdxVec Writes, Reads; - findRWs(Rec->getValueAsListOfDefs("OperandReadWrites"), Writes, Reads); + findRWs(Rec->getValueAsListOfConstDefs("OperandReadWrites"), Writes, + Reads); inferFromRW(Writes, Reads, FromClassIdx, PIdx); } } @@ -1255,7 +1257,7 @@ void CodeGenSchedModels::inferFromInstRWs(unsigned SCIdx) { if (II == IE) continue; IdxVec Writes, Reads; - findRWs(Rec->getValueAsListOfDefs("OperandReadWrites"), Writes, Reads); + findRWs(Rec->getValueAsListOfConstDefs("OperandReadWrites"), Writes, Reads); unsigned PIdx = getProcModel(Rec->getValueAsDef("SchedModel")).Index; inferFromRW(Writes, Reads, SCIdx, PIdx); // May mutate SchedClasses. SchedClasses[SCIdx].InstRWProcIndices.insert(PIdx); @@ -1348,7 +1350,8 @@ bool PredTransitions::mutuallyExclusive(Record *PredDef, const CodeGenSchedRW &SchedRW = SchedModels.getSchedRW(PC.RWIdx, PC.IsRead); assert(SchedRW.HasVariants && "PredCheck must refer to a SchedVariant"); - RecVec Variants = SchedRW.TheDef->getValueAsListOfDefs("Variants"); + ConstRecVec Variants = + SchedRW.TheDef->getValueAsListOfConstDefs("Variants"); if (any_of(Variants, [PredDef](const Record *R) { return R->getValueAsDef("Predicate") == PredDef; })) { @@ -1414,8 +1417,8 @@ void PredTransitions::getIntersectingVariants( } if (VarProcIdx == 0 || VarProcIdx == TransVec[TransIdx].ProcIndex) { // Push each variant. Assign TransVecIdx later. - const RecVec VarDefs = SchedRW.TheDef->getValueAsListOfDefs("Variants"); - for (Record *VarDef : VarDefs) + for (const Record *VarDef : + SchedRW.TheDef->getValueAsListOfDefs("Variants")) Variants.emplace_back(VarDef, SchedRW.Index, VarProcIdx, 0); if (VarProcIdx == 0) GenericRW = true; @@ -1446,8 +1449,7 @@ void PredTransitions::getIntersectingVariants( SchedModels.getSchedRW((*AI)->getValueAsDef("AliasRW")); if (AliasRW.HasVariants) { - const RecVec VarDefs = AliasRW.TheDef->getValueAsListOfDefs("Variants"); - for (Record *VD : VarDefs) + for (const Record *VD : AliasRW.TheDef->getValueAsListOfDefs("Variants")) Variants.emplace_back(VD, AliasRW.Index, AliasProcIdx, 0); } if (AliasRW.IsSequence) @@ -1495,7 +1497,8 @@ void PredTransitions::pushVariant(const TransVariant &VInfo, bool IsRead) { if (VInfo.VarOrSeqDef->isSubClassOf("SchedVar")) { Record *PredDef = VInfo.VarOrSeqDef->getValueAsDef("Predicate"); Trans.PredTerm.emplace_back(IsRead, VInfo.RWIdx, PredDef); - RecVec SelectedDefs = VInfo.VarOrSeqDef->getValueAsListOfDefs("Selected"); + ConstRecVec SelectedDefs = + VInfo.VarOrSeqDef->getValueAsListOfConstDefs("Selected"); SchedModels.findRWs(SelectedDefs, SelectedRWs, IsRead); } else { assert(VInfo.VarOrSeqDef->isSubClassOf("WriteSequence") && @@ -1761,12 +1764,14 @@ void CodeGenSchedModels::inferFromRW(ArrayRef<unsigned> OperWrites, // Check if any processor resource group contains all resource records in // SubUnits. -bool CodeGenSchedModels::hasSuperGroup(RecVec &SubUnits, CodeGenProcModel &PM) { +bool CodeGenSchedModels::hasSuperGroup(ConstRecVec &SubUnits, + CodeGenProcModel &PM) { for (const Record *ProcResourceDef : PM.ProcResourceDefs) { if (!ProcResourceDef->isSubClassOf("ProcResGroup")) continue; - RecVec SuperUnits = ProcResourceDef->getValueAsListOfDefs("Resources"); - RecIter RI = SubUnits.begin(), RE = SubUnits.end(); + ConstRecVec SuperUnits = + ProcResourceDef->getValueAsListOfConstDefs("Resources"); + auto RI = SubUnits.begin(), RE = SubUnits.end(); for (; RI != RE; ++RI) { if (!is_contained(SuperUnits, *RI)) { break; @@ -1783,13 +1788,13 @@ void CodeGenSchedModels::verifyProcResourceGroups(CodeGenProcModel &PM) { for (unsigned i = 0, e = PM.ProcResourceDefs.size(); i < e; ++i) { if (!PM.ProcResourceDefs[i]->isSubClassOf("ProcResGroup")) continue; - RecVec CheckUnits = - PM.ProcResourceDefs[i]->getValueAsListOfDefs("Resources"); + ConstRecVec CheckUnits = + PM.ProcResourceDefs[i]->getValueAsListOfConstDefs("Resources"); for (unsigned j = i + 1; j < e; ++j) { if (!PM.ProcResourceDefs[j]->isSubClassOf("ProcResGroup")) continue; - RecVec OtherUnits = - PM.ProcResourceDefs[j]->getValueAsListOfDefs("Resources"); + ConstRecVec OtherUnits = + PM.ProcResourceDefs[j]->getValueAsListOfConstDefs("Resources"); if (std::find_first_of(CheckUnits.begin(), CheckUnits.end(), OtherUnits.begin(), OtherUnits.end()) != CheckUnits.end()) { @@ -1828,7 +1833,7 @@ void CodeGenSchedModels::collectRegisterFiles() { "Invalid RegisterFile with zero physical registers"); } - RecVec RegisterClasses = RF->getValueAsListOfDefs("RegClasses"); + ConstRecVec RegisterClasses = RF->getValueAsListOfConstDefs("RegClasses"); std::vector<int64_t> RegisterCosts = RF->getValueAsListOfInts("RegCosts"); ListInit *MoveElimInfo = RF->getValueAsListInit("AllowMoveElimination"); for (unsigned I = 0, E = RegisterClasses.size(); I < E; ++I) { @@ -1866,7 +1871,8 @@ void CodeGenSchedModels::collectProcResources() { Record *RWModelDef = RW->getValueAsDef("SchedModel"); unsigned PIdx = getProcModel(RWModelDef).Index; IdxVec Writes, Reads; - findRWs(RW->getValueAsListOfDefs("OperandReadWrites"), Writes, Reads); + findRWs(RW->getValueAsListOfConstDefs("OperandReadWrites"), Writes, + Reads); collectRWResources(Writes, Reads, PIdx); } @@ -2004,13 +2010,13 @@ void CodeGenSchedModels::checkCompleteness() { } // Collect itinerary class resources for each processor. -void CodeGenSchedModels::collectItinProcResources(Record *ItinClassDef) { +void CodeGenSchedModels::collectItinProcResources(const Record *ItinClassDef) { for (unsigned PIdx = 0, PEnd = ProcModels.size(); PIdx != PEnd; ++PIdx) { const CodeGenProcModel &PM = ProcModels[PIdx]; // For all ItinRW entries. bool HasMatch = false; for (const Record *R : PM.ItinRWDefs) { - RecVec Matched = R->getValueAsListOfDefs("MatchedItinClasses"); + ConstRecVec Matched = R->getValueAsListOfConstDefs("MatchedItinClasses"); if (!llvm::is_contained(Matched, ItinClassDef)) continue; if (HasMatch) @@ -2019,7 +2025,7 @@ void CodeGenSchedModels::collectItinProcResources(Record *ItinClassDef) { " in ItinResources for " + PM.ModelName); HasMatch = true; IdxVec Writes, Reads; - findRWs(R->getValueAsListOfDefs("OperandReadWrites"), Writes, Reads); + findRWs(R->getValueAsListOfConstDefs("OperandReadWrites"), Writes, Reads); collectRWResources(Writes, Reads, PIdx); } } @@ -2139,8 +2145,8 @@ void CodeGenSchedModels::addWriteRes(const Record *ProcWriteResDef, WRDefs.push_back(ProcWriteResDef); // Visit ProcResourceKinds referenced by the newly discovered WriteRes. - RecVec ProcResDefs = ProcWriteResDef->getValueAsListOfDefs("ProcResources"); - for (const Record *ProcResDef : ProcResDefs) { + for (const Record *ProcResDef : + ProcWriteResDef->getValueAsListOfDefs("ProcResources")) { addProcResource(ProcResDef, ProcModels[PIdx], ProcWriteResDef->getLoc()); } } @@ -2186,7 +2192,7 @@ bool CodeGenProcModel::isUnsupported(const CodeGenInstruction &Inst) const { bool CodeGenProcModel::hasReadOfWrite(const Record *WriteDef) const { for (auto &RADef : ReadAdvanceDefs) { - RecVec ValidWrites = RADef->getValueAsListOfDefs("ValidWrites"); + ConstRecVec ValidWrites = RADef->getValueAsListOfConstDefs("ValidWrites"); if (is_contained(ValidWrites, WriteDef)) return true; } diff --git a/llvm/utils/TableGen/Common/CodeGenSchedule.h b/llvm/utils/TableGen/Common/CodeGenSchedule.h index ff85ac396859..f43c856b274c 100644 --- a/llvm/utils/TableGen/Common/CodeGenSchedule.h +++ b/llvm/utils/TableGen/Common/CodeGenSchedule.h @@ -33,8 +33,6 @@ class CodeGenTarget; class CodeGenSchedModels; class CodeGenInstruction; -using RecVec = std::vector<Record *>; -using RecIter = RecVec::const_iterator; using ConstRecVec = std::vector<const Record *>; using ConstRecIter = ConstRecVec::const_iterator; @@ -132,7 +130,7 @@ struct CodeGenSchedTransition { struct CodeGenSchedClass { unsigned Index; std::string Name; - Record *ItinClassDef; + const Record *ItinClassDef; IdxVec Writes; IdxVec Reads; @@ -149,10 +147,11 @@ struct CodeGenSchedClass { // InstRWs processor indices. Filled in inferFromInstRWs DenseSet<unsigned> InstRWProcIndices; - CodeGenSchedClass(unsigned Index, std::string Name, Record *ItinClassDef) + CodeGenSchedClass(unsigned Index, std::string Name, + const Record *ItinClassDef) : Index(Index), Name(std::move(Name)), ItinClassDef(ItinClassDef) {} - bool isKeyEqual(Record *IC, ArrayRef<unsigned> W, + bool isKeyEqual(const Record *IC, ArrayRef<unsigned> W, ArrayRef<unsigned> R) const { return ItinClassDef == IC && ArrayRef(Writes) == W && ArrayRef(Reads) == R; } @@ -172,10 +171,10 @@ struct CodeGenSchedClass { /// registers used by the register renamer. Register costs are defined at /// register class granularity. struct CodeGenRegisterCost { - Record *RCDef; + const Record *RCDef; unsigned Cost; bool AllowMoveElimination; - CodeGenRegisterCost(Record *RC, unsigned RegisterCost, + CodeGenRegisterCost(const Record *RC, unsigned RegisterCost, bool AllowMoveElim = false) : RCDef(RC), Cost(RegisterCost), AllowMoveElimination(AllowMoveElim) {} CodeGenRegisterCost(const CodeGenRegisterCost &) = default; @@ -231,7 +230,7 @@ struct CodeGenProcModel { // Array of InstrItinData records indexed by a CodeGenSchedClass index. // This list is empty if the Processor has no value for Itineraries. // Initialized by collectProcItins(). - RecVec ItinDefList; + ConstRecVec ItinDefList; // Map itinerary classes to per-operand resources. // This list is empty if no ItinRW refers to this Processor. @@ -239,7 +238,7 @@ struct CodeGenProcModel { // List of unsupported feature. // This list is empty if the Processor has no UnsupportedFeatures. - RecVec UnsupportedFeaturesDefs; + ConstRecVec UnsupportedFeaturesDefs; // All read/write resources associated with this processor. ConstRecVec WriteResDefs; @@ -530,13 +529,13 @@ public: const CodeGenSchedRW &getSchedRW(unsigned Idx, bool IsRead) const { return IsRead ? getSchedRead(Idx) : getSchedWrite(Idx); } - CodeGenSchedRW &getSchedRW(Record *Def) { + CodeGenSchedRW &getSchedRW(const Record *Def) { bool IsRead = Def->isSubClassOf("SchedRead"); unsigned Idx = getSchedRWIdx(Def, IsRead); return const_cast<CodeGenSchedRW &>(IsRead ? getSchedRead(Idx) : getSchedWrite(Idx)); } - const CodeGenSchedRW &getSchedRW(Record *Def) const { + const CodeGenSchedRW &getSchedRW(const Record *Def) const { return const_cast<CodeGenSchedModels &>(*this).getSchedRW(Def); } @@ -564,13 +563,13 @@ public: unsigned numInstrSchedClasses() const { return NumInstrSchedClasses; } - void findRWs(const RecVec &RWDefs, IdxVec &Writes, IdxVec &Reads) const; - void findRWs(const RecVec &RWDefs, IdxVec &RWs, bool IsRead) const; + void findRWs(const ConstRecVec &RWDefs, IdxVec &Writes, IdxVec &Reads) const; + void findRWs(const ConstRecVec &RWDefs, IdxVec &RWs, bool IsRead) const; void expandRWSequence(unsigned RWIdx, IdxVec &RWSeq, bool IsRead) const; void expandRWSeqForProc(unsigned RWIdx, IdxVec &RWSeq, bool IsRead, const CodeGenProcModel &ProcModel) const; - unsigned addSchedClass(Record *ItinDef, ArrayRef<unsigned> OperWrites, + unsigned addSchedClass(const Record *ItinDef, ArrayRef<unsigned> OperWrites, ArrayRef<unsigned> OperReads, ArrayRef<unsigned> ProcIndices); @@ -603,7 +602,7 @@ private: void collectOptionalProcessorInfo(); - std::string createSchedClassName(Record *ItinClassDef, + std::string createSchedClassName(const Record *ItinClassDef, ArrayRef<unsigned> OperWrites, ArrayRef<unsigned> OperReads); std::string createSchedClassName(const ConstRecVec &InstDefs); @@ -629,15 +628,15 @@ private: void inferFromRW(ArrayRef<unsigned> OperWrites, ArrayRef<unsigned> OperReads, unsigned FromClassIdx, ArrayRef<unsigned> ProcIndices); - void inferFromItinClass(Record *ItinClassDef, unsigned FromClassIdx); + void inferFromItinClass(const Record *ItinClassDef, unsigned FromClassIdx); void inferFromInstRWs(unsigned SCIdx); - bool hasSuperGroup(RecVec &SubUnits, CodeGenProcModel &PM); + bool hasSuperGroup(ConstRecVec &SubUnits, CodeGenProcModel &PM); void verifyProcResourceGroups(CodeGenProcModel &PM); void collectProcResources(); - void collectItinProcResources(Record *ItinClassDef); + void collectItinProcResources(const Record *ItinClassDef); void collectRWResources(unsigned RWIdx, bool IsRead, ArrayRef<unsigned> ProcIndices); diff --git a/llvm/utils/TableGen/CompressInstEmitter.cpp b/llvm/utils/TableGen/CompressInstEmitter.cpp index 06801e93f4f4..f46ceb517422 100644 --- a/llvm/utils/TableGen/CompressInstEmitter.cpp +++ b/llvm/utils/TableGen/CompressInstEmitter.cpp @@ -92,7 +92,7 @@ class CompressInstEmitter { // Integer immediate value. int64_t Imm; // Physical register. - Record *Reg; + const Record *Reg; } Data; // Tied operand index within the instruction. int TiedOpIdx = -1; @@ -103,7 +103,7 @@ class CompressInstEmitter { // The destination instruction to transform to. CodeGenInstruction Dest; // Required target features to enable pattern. - std::vector<Record *> PatReqFeatures; + std::vector<const Record *> PatReqFeatures; // Maps operands in the Source Instruction to // the corresponding Dest instruction operand. IndexedMap<OpData> SourceOperandMap; @@ -112,38 +112,40 @@ class CompressInstEmitter { IndexedMap<OpData> DestOperandMap; bool IsCompressOnly; - CompressPat(CodeGenInstruction &S, CodeGenInstruction &D, - std::vector<Record *> RF, IndexedMap<OpData> &SourceMap, + CompressPat(const CodeGenInstruction &S, const CodeGenInstruction &D, + std::vector<const Record *> RF, IndexedMap<OpData> &SourceMap, IndexedMap<OpData> &DestMap, bool IsCompressOnly) : Source(S), Dest(D), PatReqFeatures(RF), SourceOperandMap(SourceMap), DestOperandMap(DestMap), IsCompressOnly(IsCompressOnly) {} }; enum EmitterType { Compress, Uncompress, CheckCompress }; - RecordKeeper &Records; - CodeGenTarget Target; + const RecordKeeper &Records; + const CodeGenTarget Target; SmallVector<CompressPat, 4> CompressPatterns; - void addDagOperandMapping(Record *Rec, DagInit *Dag, CodeGenInstruction &Inst, + void addDagOperandMapping(const Record *Rec, const DagInit *Dag, + const CodeGenInstruction &Inst, IndexedMap<OpData> &OperandMap, bool IsSourceInst); - void evaluateCompressPat(Record *Compress); + void evaluateCompressPat(const Record *Compress); void emitCompressInstEmitter(raw_ostream &OS, EmitterType EType); bool validateTypes(const Record *DagOpType, const Record *InstOpType, bool IsSourceInst); bool validateRegister(const Record *Reg, const Record *RegClass); - void createDagOperandMapping(Record *Rec, StringMap<unsigned> &SourceOperands, + void createDagOperandMapping(const Record *Rec, + StringMap<unsigned> &SourceOperands, StringMap<unsigned> &DestOperands, - DagInit *SourceDag, DagInit *DestDag, + const DagInit *SourceDag, const DagInit *DestDag, IndexedMap<OpData> &SourceOperandMap); - void createInstOperandMapping(Record *Rec, DagInit *SourceDag, - DagInit *DestDag, + void createInstOperandMapping(const Record *Rec, const DagInit *SourceDag, + const DagInit *DestDag, IndexedMap<OpData> &SourceOperandMap, IndexedMap<OpData> &DestOperandMap, StringMap<unsigned> &SourceOperands, - CodeGenInstruction &DestInst); + const CodeGenInstruction &DestInst); public: - CompressInstEmitter(RecordKeeper &R) : Records(R), Target(R) {} + CompressInstEmitter(const RecordKeeper &R) : Records(R), Target(R) {} void run(raw_ostream &OS); }; @@ -156,7 +158,7 @@ bool CompressInstEmitter::validateRegister(const Record *Reg, "RegClass record should be a RegisterClass"); const CodeGenRegisterClass &RC = Target.getRegisterClass(RegClass); const CodeGenRegister *R = Target.getRegisterByName(Reg->getName().lower()); - assert((R != nullptr) && "Register not defined!!"); + assert(R != nullptr && "Register not defined!!"); return RC.contains(R); } @@ -199,8 +201,9 @@ bool CompressInstEmitter::validateTypes(const Record *DagOpType, /// operands and fixed registers it expects the Dag operand type to be contained /// in the instantiated instruction operand type. For immediate operands and /// immediates no validation checks are enforced at pattern validation time. -void CompressInstEmitter::addDagOperandMapping(Record *Rec, DagInit *Dag, - CodeGenInstruction &Inst, +void CompressInstEmitter::addDagOperandMapping(const Record *Rec, + const DagInit *Dag, + const CodeGenInstruction &Inst, IndexedMap<OpData> &OperandMap, bool IsSourceInst) { // TiedCount keeps track of the number of operands skipped in Inst @@ -218,7 +221,7 @@ void CompressInstEmitter::addDagOperandMapping(Record *Rec, DagInit *Dag, TiedCount++; continue; } - if (DefInit *DI = dyn_cast<DefInit>(Dag->getArg(I - TiedCount))) { + if (const DefInit *DI = dyn_cast<DefInit>(Dag->getArg(I - TiedCount))) { if (DI->getDef()->isSubClassOf("Register")) { // Check if the fixed register belongs to the Register class. if (!validateRegister(DI->getDef(), Inst.Operands[I].Rec)) @@ -267,7 +270,7 @@ void CompressInstEmitter::addDagOperandMapping(Record *Rec, DagInit *Dag, } // Verify the Dag operand count is enough to build an instruction. -static bool verifyDagOpCount(CodeGenInstruction &Inst, DagInit *Dag, +static bool verifyDagOpCount(const CodeGenInstruction &Inst, const DagInit *Dag, bool IsSource) { if (Dag->getNumArgs() == Inst.Operands.size()) return true; @@ -297,7 +300,7 @@ static bool verifyDagOpCount(CodeGenInstruction &Inst, DagInit *Dag, return true; } -static bool validateArgsTypes(Init *Arg1, Init *Arg2) { +static bool validateArgsTypes(const Init *Arg1, const Init *Arg2) { return cast<DefInit>(Arg1)->getDef() == cast<DefInit>(Arg2)->getDef(); } @@ -307,9 +310,9 @@ static bool validateArgsTypes(Init *Arg1, Init *Arg2) { // mapping $rs1 --> 0, $rs2 ---> 1. If the operand appears twice in the (tied) // same Dag we use the last occurrence for indexing. void CompressInstEmitter::createDagOperandMapping( - Record *Rec, StringMap<unsigned> &SourceOperands, - StringMap<unsigned> &DestOperands, DagInit *SourceDag, DagInit *DestDag, - IndexedMap<OpData> &SourceOperandMap) { + const Record *Rec, StringMap<unsigned> &SourceOperands, + StringMap<unsigned> &DestOperands, const DagInit *SourceDag, + const DagInit *DestDag, IndexedMap<OpData> &SourceOperandMap) { for (unsigned I = 0; I < DestDag->getNumArgs(); ++I) { // Skip fixed immediates and registers, they were handled in // addDagOperandMapping. @@ -354,9 +357,9 @@ void CompressInstEmitter::createDagOperandMapping( /// output instructions. Validate that operands defined in the input are /// used in the output pattern while populating the maps. void CompressInstEmitter::createInstOperandMapping( - Record *Rec, DagInit *SourceDag, DagInit *DestDag, + const Record *Rec, const DagInit *SourceDag, const DagInit *DestDag, IndexedMap<OpData> &SourceOperandMap, IndexedMap<OpData> &DestOperandMap, - StringMap<unsigned> &SourceOperands, CodeGenInstruction &DestInst) { + StringMap<unsigned> &SourceOperands, const CodeGenInstruction &DestInst) { // TiedCount keeps track of the number of operands skipped in Inst // operands list to get to the corresponding Dag operand. unsigned TiedCount = 0; @@ -423,14 +426,14 @@ void CompressInstEmitter::createInstOperandMapping( /// and generate warning. /// - Immediate operand type in Dag Input differs from the corresponding Source /// Instruction type and generate a warning. -void CompressInstEmitter::evaluateCompressPat(Record *Rec) { +void CompressInstEmitter::evaluateCompressPat(const Record *Rec) { // Validate input Dag operands. DagInit *SourceDag = Rec->getValueAsDag("Input"); assert(SourceDag && "Missing 'Input' in compress pattern!"); LLVM_DEBUG(dbgs() << "Input: " << *SourceDag << "\n"); // Checking we are transforming from compressed to uncompressed instructions. - Record *SourceOperator = SourceDag->getOperatorAsDef(Rec->getLoc()); + const Record *SourceOperator = SourceDag->getOperatorAsDef(Rec->getLoc()); CodeGenInstruction SourceInst(SourceOperator); verifyDagOpCount(SourceInst, SourceDag, true); @@ -439,7 +442,7 @@ void CompressInstEmitter::evaluateCompressPat(Record *Rec) { assert(DestDag && "Missing 'Output' in compress pattern!"); LLVM_DEBUG(dbgs() << "Output: " << *DestDag << "\n"); - Record *DestOperator = DestDag->getOperatorAsDef(Rec->getLoc()); + const Record *DestOperator = DestDag->getOperatorAsDef(Rec->getLoc()); CodeGenInstruction DestInst(DestOperator); verifyDagOpCount(DestInst, DestDag, false); @@ -475,9 +478,9 @@ void CompressInstEmitter::evaluateCompressPat(Record *Rec) { DestOperandMap, SourceOperands, DestInst); // Get the target features for the CompressPat. - std::vector<Record *> PatReqFeatures; - std::vector<Record *> RF = Rec->getValueAsListOfDefs("Predicates"); - copy_if(RF, std::back_inserter(PatReqFeatures), [](Record *R) { + std::vector<const Record *> PatReqFeatures; + std::vector<const Record *> RF = Rec->getValueAsListOfConstDefs("Predicates"); + copy_if(RF, std::back_inserter(PatReqFeatures), [](const Record *R) { return R->getValueAsBit("AssemblerMatcherPredicate"); }); @@ -489,8 +492,8 @@ void CompressInstEmitter::evaluateCompressPat(Record *Rec) { static void getReqFeatures(std::set<std::pair<bool, StringRef>> &FeaturesSet, std::set<std::set<std::pair<bool, StringRef>>> &AnyOfFeatureSets, - const std::vector<Record *> &ReqFeatures) { - for (auto &R : ReqFeatures) { + ArrayRef<const Record *> ReqFeatures) { + for (const Record *R : ReqFeatures) { const DagInit *D = R->getValueAsDag("AssemblerCondDag"); std::string CombineType = D->getOperator()->getAsString(); if (CombineType != "any_of" && CombineType != "all_of") @@ -542,8 +545,8 @@ static unsigned getPredicates(DenseMap<const Record *, unsigned> &PredicateMap, return 0; } -static void printPredicates(const std::vector<const Record *> &Predicates, - StringRef Name, raw_ostream &OS) { +static void printPredicates(ArrayRef<const Record *> Predicates, StringRef Name, + raw_ostream &OS) { for (unsigned I = 0; I < Predicates.size(); ++I) { StringRef Pred = Predicates[I]->getValueAsString(Name); OS << " case " << I + 1 << ": {\n" @@ -565,7 +568,7 @@ static void mergeCondAndCode(raw_ostream &CombinedStream, StringRef CondStr, void CompressInstEmitter::emitCompressInstEmitter(raw_ostream &OS, EmitterType EType) { - Record *AsmWriter = Target.getAsmWriter(); + const Record *AsmWriter = Target.getAsmWriter(); if (!AsmWriter->getValueAsInt("PassSubtarget")) PrintFatalError(AsmWriter->getLoc(), "'PassSubtarget' is false. SubTargetInfo object is needed " @@ -683,9 +686,10 @@ void CompressInstEmitter::emitCompressInstEmitter(raw_ostream &OS, getReqFeatures(FeaturesSet, AnyOfFeatureSets, CompressPat.PatReqFeatures); // Add Dest instruction required features. - std::vector<Record *> ReqFeatures; - std::vector<Record *> RF = Dest.TheDef->getValueAsListOfDefs("Predicates"); - copy_if(RF, std::back_inserter(ReqFeatures), [](Record *R) { + std::vector<const Record *> ReqFeatures; + std::vector<const Record *> RF = + Dest.TheDef->getValueAsListOfConstDefs("Predicates"); + copy_if(RF, std::back_inserter(ReqFeatures), [](const Record *R) { return R->getValueAsBit("AssemblerMatcherPredicate"); }); getReqFeatures(FeaturesSet, AnyOfFeatureSets, ReqFeatures); @@ -738,7 +742,7 @@ void CompressInstEmitter::emitCompressInstEmitter(raw_ostream &OS, << ").getImm() == " << SourceOperandMap[OpNo].Data.Imm << ") &&\n"; break; case OpData::Reg: { - Record *Reg = SourceOperandMap[OpNo].Data.Reg; + const Record *Reg = SourceOperandMap[OpNo].Data.Reg; CondStream.indent(6) << "(MI.getOperand(" << OpNo << ").isReg()) &&\n" << " (MI.getOperand(" << OpNo << ").getReg() == " << TargetName @@ -827,7 +831,7 @@ void CompressInstEmitter::emitCompressInstEmitter(raw_ostream &OS, case OpData::Reg: { if (CompressOrUncompress) { // Fixed register has been validated at pattern validation time. - Record *Reg = DestOperandMap[OpNo].Data.Reg; + const Record *Reg = DestOperandMap[OpNo].Data.Reg; CodeStream.indent(6) << "OutInst.addOperand(MCOperand::createReg(" << TargetName << "::" << Reg->getName() << "));\n"; @@ -891,11 +895,9 @@ void CompressInstEmitter::emitCompressInstEmitter(raw_ostream &OS, } void CompressInstEmitter::run(raw_ostream &OS) { - std::vector<Record *> Insts = Records.getAllDerivedDefinitions("CompressPat"); - // Process the CompressPat definitions, validating them as we do so. - for (unsigned I = 0, E = Insts.size(); I != E; ++I) - evaluateCompressPat(Insts[I]); + for (const Record *Pat : Records.getAllDerivedDefinitions("CompressPat")) + evaluateCompressPat(Pat); // Emit file header. emitSourceFileHeader("Compress instruction Source Fragment", OS, Records); diff --git a/llvm/utils/TableGen/DAGISelEmitter.cpp b/llvm/utils/TableGen/DAGISelEmitter.cpp index 6c72103f6251..2cceb22afdb9 100644 --- a/llvm/utils/TableGen/DAGISelEmitter.cpp +++ b/llvm/utils/TableGen/DAGISelEmitter.cpp @@ -25,11 +25,11 @@ namespace { /// DAGISelEmitter - The top-level class which coordinates construction /// and emission of the instruction selector. class DAGISelEmitter { - RecordKeeper &Records; // Just so we can get at the timing functions. - CodeGenDAGPatterns CGP; + const RecordKeeper &Records; // Just so we can get at the timing functions. + const CodeGenDAGPatterns CGP; public: - explicit DAGISelEmitter(RecordKeeper &R) : Records(R), CGP(R) {} + explicit DAGISelEmitter(const RecordKeeper &R) : Records(R), CGP(R) {} void run(raw_ostream &OS); }; } // End anonymous namespace @@ -81,8 +81,8 @@ namespace { // In particular, we want to match maximal patterns first and lowest cost within // a particular complexity first. struct PatternSortingPredicate { - PatternSortingPredicate(CodeGenDAGPatterns &cgp) : CGP(cgp) {} - CodeGenDAGPatterns &CGP; + PatternSortingPredicate(const CodeGenDAGPatterns &cgp) : CGP(cgp) {} + const CodeGenDAGPatterns &CGP; bool operator()(const PatternToMatch *LHS, const PatternToMatch *RHS) { const TreePatternNode < = LHS->getSrcPattern(); diff --git a/llvm/utils/TableGen/DAGISelMatcherGen.cpp b/llvm/utils/TableGen/DAGISelMatcherGen.cpp index 5cb393ae7a53..e159cf1bbefd 100644 --- a/llvm/utils/TableGen/DAGISelMatcherGen.cpp +++ b/llvm/utils/TableGen/DAGISelMatcherGen.cpp @@ -23,7 +23,7 @@ using namespace llvm; /// getRegisterValueType - Look up and return the ValueType of the specified /// register. If the register is a member of multiple register classes, they /// must all have the same type. -static MVT::SimpleValueType getRegisterValueType(Record *R, +static MVT::SimpleValueType getRegisterValueType(const Record *R, const CodeGenTarget &T) { bool FoundRC = false; MVT::SimpleValueType VT = MVT::Other; @@ -91,7 +91,7 @@ class MatcherGen { /// PhysRegInputs - List list has an entry for each explicitly specified /// physreg input to the pattern. The first elt is the Register node, the /// second is the recorded slot number the input pattern match saved it in. - SmallVector<std::pair<Record *, unsigned>, 2> PhysRegInputs; + SmallVector<std::pair<const Record *, unsigned>, 2> PhysRegInputs; /// Matcher - This is the top level of the generated matcher, the result. Matcher *TheMatcher; @@ -220,13 +220,13 @@ void MatcherGen::EmitLeafMatchCode(const TreePatternNode &N) { return; } - DefInit *DI = dyn_cast<DefInit>(N.getLeafValue()); + const DefInit *DI = dyn_cast<DefInit>(N.getLeafValue()); if (!DI) { errs() << "Unknown leaf kind: " << N << "\n"; abort(); } - Record *LeafRec = DI->getDef(); + const Record *LeafRec = DI->getDef(); // A ValueType leaf node can represent a register when named, or itself when // unnamed. @@ -673,7 +673,7 @@ void MatcherGen::EmitResultLeafAsOperand(const TreePatternNode &N, // If this is an explicit register reference, handle it. if (DefInit *DI = dyn_cast<DefInit>(N.getLeafValue())) { - Record *Def = DI->getDef(); + const Record *Def = DI->getDef(); if (Def->isSubClassOf("Register")) { const CodeGenRegister *Reg = CGP.getTargetInfo().getRegBank().getReg(Def); AddMatcher(new EmitRegisterMatcher(Reg, N.getSimpleType(0))); @@ -690,7 +690,7 @@ void MatcherGen::EmitResultLeafAsOperand(const TreePatternNode &N, if (Def->getName() == "undef_tied_input") { MVT::SimpleValueType ResultVT = N.getSimpleType(0); auto IDOperandNo = NextRecordedOperandNo++; - Record *ImpDef = Def->getRecords().getDef("IMPLICIT_DEF"); + const Record *ImpDef = Def->getRecords().getDef("IMPLICIT_DEF"); CodeGenInstruction &II = CGP.getTargetInfo().getInstruction(ImpDef); AddMatcher(new EmitNodeMatcher(II, ResultVT, std::nullopt, false, false, false, false, -1, IDOperandNo)); @@ -907,11 +907,11 @@ void MatcherGen::EmitResultInstructionAsOperand( if (isRoot && !Pattern.getDstRegs().empty()) { // If the root came from an implicit def in the instruction handling stuff, // don't re-add it. - Record *HandledReg = nullptr; + const Record *HandledReg = nullptr; if (II.HasOneImplicitDefWithKnownVT(CGT) != MVT::Other) HandledReg = II.ImplicitDefs[0]; - for (Record *Reg : Pattern.getDstRegs()) { + for (const Record *Reg : Pattern.getDstRegs()) { if (!Reg->isSubClassOf("Register") || Reg == HandledReg) continue; ResultVTs.push_back(getRegisterValueType(Reg, CGT)); @@ -1042,7 +1042,7 @@ void MatcherGen::EmitResultCode() { if (!Pattern.getDstRegs().empty()) { // If the root came from an implicit def in the instruction handling stuff, // don't re-add it. - Record *HandledReg = nullptr; + const Record *HandledReg = nullptr; const TreePatternNode &DstPat = Pattern.getDstPattern(); if (!DstPat.isLeaf() && DstPat.getOperator()->isSubClassOf("Instruction")) { const CodeGenTarget &CGT = CGP.getTargetInfo(); @@ -1052,7 +1052,7 @@ void MatcherGen::EmitResultCode() { HandledReg = II.ImplicitDefs[0]; } - for (Record *Reg : Pattern.getDstRegs()) { + for (const Record *Reg : Pattern.getDstRegs()) { if (!Reg->isSubClassOf("Register") || Reg == HandledReg) continue; ++NumSrcResults; diff --git a/llvm/utils/TableGen/DFAEmitter.cpp b/llvm/utils/TableGen/DFAEmitter.cpp index 18620b2a073f..7d274a1cf632 100644 --- a/llvm/utils/TableGen/DFAEmitter.cpp +++ b/llvm/utils/TableGen/DFAEmitter.cpp @@ -170,7 +170,7 @@ void DfaEmitter::printActionValue(action_type A, raw_ostream &OS) { OS << A; } namespace { -using Action = std::variant<Record *, unsigned, std::string>; +using Action = std::variant<const Record *, unsigned, std::string>; using ActionTuple = std::vector<Action>; class Automaton; @@ -356,7 +356,7 @@ void CustomDfaEmitter::printActionValue(action_type A, raw_ostream &OS) { ListSeparator LS; for (const auto &SingleAction : AT) { OS << LS; - if (const auto *R = std::get_if<Record *>(&SingleAction)) + if (const auto *R = std::get_if<const Record *>(&SingleAction)) OS << (*R)->getName(); else if (const auto *S = std::get_if<std::string>(&SingleAction)) OS << '"' << *S << '"'; diff --git a/llvm/utils/TableGen/DFAPacketizerEmitter.cpp b/llvm/utils/TableGen/DFAPacketizerEmitter.cpp index 55cb39c9de5f..42155e78d0a2 100644 --- a/llvm/utils/TableGen/DFAPacketizerEmitter.cpp +++ b/llvm/utils/TableGen/DFAPacketizerEmitter.cpp @@ -61,7 +61,7 @@ struct ScheduleClass { class DFAPacketizerEmitter { private: std::string TargetName; - RecordKeeper &Records; + const RecordKeeper &Records; UniqueVector<ResourceVector> UniqueResources; std::vector<ScheduleClass> ScheduleClasses; @@ -69,17 +69,18 @@ private: std::map<unsigned, uint64_t> ComboBitToBitsMap; public: - DFAPacketizerEmitter(RecordKeeper &R); + DFAPacketizerEmitter(const RecordKeeper &R); // Construct a map of function unit names to bits. int collectAllFuncUnits(ArrayRef<const CodeGenProcModel *> ProcModels); // Construct a map from a combo function unit bit to the bits of all included // functional units. - int collectAllComboFuncs(ArrayRef<Record *> ComboFuncList); + int collectAllComboFuncs(ArrayRef<const Record *> ComboFuncList); - ResourceVector getResourcesForItinerary(Record *Itinerary); - void createScheduleClasses(unsigned ItineraryIdx, const RecVec &Itineraries); + ResourceVector getResourcesForItinerary(const Record *Itinerary); + void createScheduleClasses(unsigned ItineraryIdx, + ArrayRef<const Record *> Itineraries); // Emit code for a subset of itineraries. void emitForItineraries(raw_ostream &OS, @@ -90,7 +91,7 @@ public: }; } // end anonymous namespace -DFAPacketizerEmitter::DFAPacketizerEmitter(RecordKeeper &R) +DFAPacketizerEmitter::DFAPacketizerEmitter(const RecordKeeper &R) : TargetName(std::string(CodeGenTarget(R).getName())), Records(R) {} int DFAPacketizerEmitter::collectAllFuncUnits( @@ -107,7 +108,7 @@ int DFAPacketizerEmitter::collectAllFuncUnits( int totalFUs = 0; // Parse functional units for all the itineraries. for (const Record *Proc : ProcItinList) { - std::vector<Record *> FUs = Proc->getValueAsListOfDefs("FU"); + std::vector<const Record *> FUs = Proc->getValueAsListOfConstDefs("FU"); LLVM_DEBUG(dbgs() << " FU:" << " (" << FUs.size() << " FUs) " << Proc->getName()); @@ -129,7 +130,7 @@ int DFAPacketizerEmitter::collectAllFuncUnits( } int DFAPacketizerEmitter::collectAllComboFuncs( - ArrayRef<Record *> ComboFuncList) { + ArrayRef<const Record *> ComboFuncList) { LLVM_DEBUG(dbgs() << "-------------------------------------------------------" "----------------------\n"); LLVM_DEBUG(dbgs() << "collectAllComboFuncs"); @@ -137,8 +138,8 @@ int DFAPacketizerEmitter::collectAllComboFuncs( int numCombos = 0; for (unsigned i = 0, N = ComboFuncList.size(); i < N; ++i) { - Record *Func = ComboFuncList[i]; - std::vector<Record *> FUs = Func->getValueAsListOfDefs("CFD"); + const Record *Func = ComboFuncList[i]; + std::vector<const Record *> FUs = Func->getValueAsListOfConstDefs("CFD"); LLVM_DEBUG(dbgs() << " CFD:" << i << " (" << FUs.size() << " combo FUs) " << Func->getName() << "\n"); @@ -147,16 +148,16 @@ int DFAPacketizerEmitter::collectAllComboFuncs( for (unsigned j = 0, N = FUs.size(); j < N; ++j) { assert((j < DFA_MAX_RESOURCES) && "Exceeded maximum number of DFA resources"); - Record *FuncData = FUs[j]; - Record *ComboFunc = FuncData->getValueAsDef("TheComboFunc"); - const std::vector<Record *> &FuncList = - FuncData->getValueAsListOfDefs("FuncList"); + const Record *FuncData = FUs[j]; + const Record *ComboFunc = FuncData->getValueAsDef("TheComboFunc"); + const std::vector<const Record *> FuncList = + FuncData->getValueAsListOfConstDefs("FuncList"); const std::string &ComboFuncName = std::string(ComboFunc->getName()); uint64_t ComboBit = FUNameToBitsMap[ComboFuncName]; uint64_t ComboResources = ComboBit; LLVM_DEBUG(dbgs() << " combo: " << ComboFuncName << ":0x" << Twine::utohexstr(ComboResources) << "\n"); - for (auto *K : FuncList) { + for (const Record *K : FuncList) { std::string FuncName = std::string(K->getName()); uint64_t FuncResources = FUNameToBitsMap[FuncName]; LLVM_DEBUG(dbgs() << " " << FuncName << ":0x" @@ -174,12 +175,12 @@ int DFAPacketizerEmitter::collectAllComboFuncs( } ResourceVector -DFAPacketizerEmitter::getResourcesForItinerary(Record *Itinerary) { +DFAPacketizerEmitter::getResourcesForItinerary(const Record *Itinerary) { ResourceVector Resources; assert(Itinerary); - for (Record *StageDef : Itinerary->getValueAsListOfDefs("Stages")) { + for (const Record *StageDef : Itinerary->getValueAsListOfDefs("Stages")) { uint64_t StageResources = 0; - for (Record *Unit : StageDef->getValueAsListOfDefs("Units")) { + for (const Record *Unit : StageDef->getValueAsListOfDefs("Units")) { StageResources |= FUNameToBitsMap[std::string(Unit->getName())]; } if (StageResources != 0) @@ -188,10 +189,10 @@ DFAPacketizerEmitter::getResourcesForItinerary(Record *Itinerary) { return Resources; } -void DFAPacketizerEmitter::createScheduleClasses(unsigned ItineraryIdx, - const RecVec &Itineraries) { +void DFAPacketizerEmitter::createScheduleClasses( + unsigned ItineraryIdx, ArrayRef<const Record *> Itineraries) { unsigned Idx = 0; - for (Record *Itinerary : Itineraries) { + for (const Record *Itinerary : Itineraries) { if (!Itinerary) { ScheduleClasses.push_back({ItineraryIdx, Idx++, 0, ResourceVector{}}); continue; diff --git a/llvm/utils/TableGen/DXILEmitter.cpp b/llvm/utils/TableGen/DXILEmitter.cpp index 20164e1368ee..a4b549509286 100644 --- a/llvm/utils/TableGen/DXILEmitter.cpp +++ b/llvm/utils/TableGen/DXILEmitter.cpp @@ -325,8 +325,7 @@ static std::string getAttributeMaskString(const SmallVector<Record *> Recs) { } /// Emit a mapping of DXIL opcode to opname -static void emitDXILOpCodes(std::vector<DXILOperationDesc> &Ops, - raw_ostream &OS) { +static void emitDXILOpCodes(ArrayRef<DXILOperationDesc> Ops, raw_ostream &OS) { OS << "#ifdef DXIL_OPCODE\n"; for (const DXILOperationDesc &Op : Ops) OS << "DXIL_OPCODE(" << Op.OpCode << ", " << Op.OpName << ")\n"; @@ -336,23 +335,20 @@ static void emitDXILOpCodes(std::vector<DXILOperationDesc> &Ops, } /// Emit a list of DXIL op classes -static void emitDXILOpClasses(RecordKeeper &Records, raw_ostream &OS) { +static void emitDXILOpClasses(const RecordKeeper &Records, raw_ostream &OS) { OS << "#ifdef DXIL_OPCLASS\n"; - std::vector<Record *> OpClasses = - Records.getAllDerivedDefinitions("DXILOpClass"); - for (Record *OpClass : OpClasses) + for (const Record *OpClass : Records.getAllDerivedDefinitions("DXILOpClass")) OS << "DXIL_OPCLASS(" << OpClass->getName() << ")\n"; OS << "#undef DXIL_OPCLASS\n"; OS << "#endif\n\n"; } /// Emit a list of DXIL op parameter types -static void emitDXILOpParamTypes(RecordKeeper &Records, raw_ostream &OS) { +static void emitDXILOpParamTypes(const RecordKeeper &Records, raw_ostream &OS) { OS << "#ifdef DXIL_OP_PARAM_TYPE\n"; - std::vector<Record *> OpClasses = - Records.getAllDerivedDefinitions("DXILOpParamType"); - for (Record *OpClass : OpClasses) - OS << "DXIL_OP_PARAM_TYPE(" << OpClass->getName() << ")\n"; + for (const Record *OpParamType : + Records.getAllDerivedDefinitions("DXILOpParamType")) + OS << "DXIL_OP_PARAM_TYPE(" << OpParamType->getName() << ")\n"; OS << "#undef DXIL_OP_PARAM_TYPE\n"; OS << "#endif\n\n"; } @@ -378,7 +374,7 @@ static void emitDXILOpFunctionTypes(ArrayRef<DXILOperationDesc> Ops, /// Emit map of DXIL operation to LLVM or DirectX intrinsic /// \param A vector of DXIL Ops /// \param Output stream -static void emitDXILIntrinsicMap(std::vector<DXILOperationDesc> &Ops, +static void emitDXILIntrinsicMap(ArrayRef<DXILOperationDesc> Ops, raw_ostream &OS) { OS << "#ifdef DXIL_OP_INTRINSIC\n"; OS << "\n"; @@ -396,14 +392,14 @@ static void emitDXILIntrinsicMap(std::vector<DXILOperationDesc> &Ops, /// Emit DXIL operation table /// \param A vector of DXIL Ops /// \param Output stream -static void emitDXILOperationTable(std::vector<DXILOperationDesc> &Ops, +static void emitDXILOperationTable(ArrayRef<DXILOperationDesc> Ops, raw_ostream &OS) { // Collect Names. SequenceToOffsetTable<std::string> OpClassStrings; SequenceToOffsetTable<std::string> OpStrings; StringSet<> ClassSet; - for (auto &Op : Ops) { + for (const auto &Op : Ops) { OpStrings.add(Op.OpName); if (ClassSet.insert(Op.OpClass).second) @@ -421,7 +417,7 @@ static void emitDXILOperationTable(std::vector<DXILOperationDesc> &Ops, OS << " static const OpCodeProperty OpCodeProps[] = {\n"; std::string Prefix = ""; - for (auto &Op : Ops) { + for (const auto &Op : Ops) { OS << Prefix << " { dxil::OpCode::" << Op.OpName << ", " << OpStrings.get(Op.OpName) << ", OpCodeClass::" << Op.OpClass << ", " << OpClassStrings.get(Op.OpClass.data()) << ", " @@ -469,14 +465,15 @@ static void emitDXILOperationTable(std::vector<DXILOperationDesc> &Ops, OS << "}\n\n"; } -static void emitDXILOperationTableDataStructs(RecordKeeper &Records, +static void emitDXILOperationTableDataStructs(const RecordKeeper &Records, raw_ostream &OS) { // Get Shader stage records - std::vector<Record *> ShaderKindRecs = + std::vector<const Record *> ShaderKindRecs = Records.getAllDerivedDefinitions("DXILShaderStage"); // Sort records by name - llvm::sort(ShaderKindRecs, - [](Record *A, Record *B) { return A->getName() < B->getName(); }); + llvm::sort(ShaderKindRecs, [](const Record *A, const Record *B) { + return A->getName() < B->getName(); + }); OS << "// Valid shader kinds\n\n"; // Choose the type of enum ShaderKind based on the number of stages declared. @@ -508,22 +505,21 @@ static void emitDXILOperationTableDataStructs(RecordKeeper &Records, /// Entry function call that invokes the functionality of this TableGen backend /// \param Records TableGen records of DXIL Operations defined in DXIL.td /// \param OS output stream -static void EmitDXILOperation(RecordKeeper &Records, raw_ostream &OS) { +static void EmitDXILOperation(const RecordKeeper &Records, raw_ostream &OS) { OS << "// Generated code, do not edit.\n"; OS << "\n"; // Get all DXIL Ops property records - std::vector<Record *> OpIntrProps = - Records.getAllDerivedDefinitions("DXILOp"); std::vector<DXILOperationDesc> DXILOps; - for (auto *Record : OpIntrProps) { - DXILOps.emplace_back(DXILOperationDesc(Record)); + for (const Record *R : Records.getAllDerivedDefinitions("DXILOp")) { + DXILOps.emplace_back(DXILOperationDesc(R)); } // Sort by opcode. - llvm::sort(DXILOps, [](DXILOperationDesc &A, DXILOperationDesc &B) { - return A.OpCode < B.OpCode; - }); + llvm::sort(DXILOps, + [](const DXILOperationDesc &A, const DXILOperationDesc &B) { + return A.OpCode < B.OpCode; + }); int PrevOp = -1; - for (DXILOperationDesc &Desc : DXILOps) { + for (const DXILOperationDesc &Desc : DXILOps) { if (Desc.OpCode == PrevOp) PrintFatalError(Twine("Duplicate opcode: ") + Twine(Desc.OpCode)); PrevOp = Desc.OpCode; diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp index b5da37b51346..edecb9067bcc 100644 --- a/llvm/utils/TableGen/DecoderEmitter.cpp +++ b/llvm/utils/TableGen/DecoderEmitter.cpp @@ -155,12 +155,12 @@ raw_ostream &operator<<(raw_ostream &OS, const EncodingAndInst &Value) { } class DecoderEmitter { - RecordKeeper &RK; + const RecordKeeper &RK; std::vector<EncodingAndInst> NumberedEncodings; public: - DecoderEmitter(RecordKeeper &R, std::string PredicateNamespace) - : RK(R), Target(R), PredicateNamespace(std::move(PredicateNamespace)) {} + DecoderEmitter(const RecordKeeper &R, const std::string &PredicateNamespace) + : RK(R), Target(R), PredicateNamespace(PredicateNamespace) {} // Emit the decoder state machine table. void emitTable(formatted_raw_ostream &o, DecoderTable &Table, @@ -181,7 +181,7 @@ private: CodeGenTarget Target; public: - std::string PredicateNamespace; + const std::string &PredicateNamespace; }; } // end anonymous namespace @@ -1302,7 +1302,7 @@ bool FilterChooser::emitPredicateMatch(raw_ostream &o, unsigned &Indentation, AllInstructions[Opc].EncodingDef->getValueAsListInit("Predicates"); bool IsFirstEmission = true; for (unsigned i = 0; i < Predicates->size(); ++i) { - Record *Pred = Predicates->getElementAsRecord(i); + const Record *Pred = Predicates->getElementAsRecord(i); if (!Pred->getValue("AssemblerMatcherPredicate")) continue; @@ -1320,10 +1320,10 @@ bool FilterChooser::emitPredicateMatch(raw_ostream &o, unsigned &Indentation, } bool FilterChooser::doesOpcodeNeedPredicate(unsigned Opc) const { - ListInit *Predicates = + const ListInit *Predicates = AllInstructions[Opc].EncodingDef->getValueAsListInit("Predicates"); for (unsigned i = 0; i < Predicates->size(); ++i) { - Record *Pred = Predicates->getElementAsRecord(i); + const Record *Pred = Predicates->getElementAsRecord(i); if (!Pred->getValue("AssemblerMatcherPredicate")) continue; @@ -1868,7 +1868,7 @@ static std::string findOperandDecoderMethod(const Record *Record) { std::string Decoder; const RecordVal *DecoderString = Record->getValue("DecoderMethod"); - StringInit *String = + const StringInit *String = DecoderString ? dyn_cast<StringInit>(DecoderString->getValue()) : nullptr; if (String) { Decoder = std::string(String->getValue()); @@ -2010,7 +2010,7 @@ static void addOneOperandFields(const Record &EncodingDef, const BitsInit &Bits, } static unsigned -populateInstruction(CodeGenTarget &Target, const Record &EncodingDef, +populateInstruction(const CodeGenTarget &Target, const Record &EncodingDef, const CodeGenInstruction &CGI, unsigned Opc, std::map<unsigned, std::vector<OperandInfo>> &Operands, bool IsVarLenInst) { @@ -2089,12 +2089,12 @@ populateInstruction(CodeGenTarget &Target, const Record &EncodingDef, DagInit *SubArgDag = dyn_cast<DagInit>(OpInit); if (SubArgDag) OpInit = SubArgDag->getOperator(); - Record *OpTypeRec = cast<DefInit>(OpInit)->getDef(); + const Record *OpTypeRec = cast<DefInit>(OpInit)->getDef(); // Lookup the sub-operands from the operand type record (note that only // Operand subclasses have MIOperandInfo, see CodeGenInstruction.cpp). - DagInit *SubOps = OpTypeRec->isSubClassOf("Operand") - ? OpTypeRec->getValueAsDag("MIOperandInfo") - : nullptr; + const DagInit *SubOps = OpTypeRec->isSubClassOf("Operand") + ? OpTypeRec->getValueAsDag("MIOperandInfo") + : nullptr; // Lookup the decoder method and construct a new OperandInfo to hold our // result. @@ -2549,7 +2549,7 @@ namespace llvm { handleHwModesUnrelatedEncodings(NumberedInstruction, HwModeNames, NamespacesWithHwModes, NumberedEncodings); } - for (const auto &NumberedAlias : + for (const Record *NumberedAlias : RK.getAllDerivedDefinitions("AdditionalEncoding")) NumberedEncodings.emplace_back( NumberedAlias, diff --git a/llvm/utils/TableGen/FastISelEmitter.cpp b/llvm/utils/TableGen/FastISelEmitter.cpp index 01df873ece1f..af05496a7b6a 100644 --- a/llvm/utils/TableGen/FastISelEmitter.cpp +++ b/llvm/utils/TableGen/FastISelEmitter.cpp @@ -272,7 +272,7 @@ struct OperandsSignature { DefInit *OpDI = dyn_cast<DefInit>(Op.getLeafValue()); if (!OpDI) return false; - Record *OpLeafRec = OpDI->getDef(); + const Record *OpLeafRec = OpDI->getDef(); // For now, the only other thing we accept is register operands. const CodeGenRegisterClass *RC = nullptr; @@ -407,7 +407,7 @@ class FastISelMap { public: explicit FastISelMap(StringRef InstNS); - void collectPatterns(CodeGenDAGPatterns &CGP); + void collectPatterns(const CodeGenDAGPatterns &CGP); void printImmediatePredicates(raw_ostream &OS); void printFunctionDefinitions(raw_ostream &OS); @@ -417,7 +417,8 @@ private: }; } // End anonymous namespace -static std::string getOpcodeName(const Record *Op, CodeGenDAGPatterns &CGP) { +static std::string getOpcodeName(const Record *Op, + const CodeGenDAGPatterns &CGP) { return std::string(CGP.getSDNodeInfo(Op).getEnumName()); } @@ -437,7 +438,7 @@ static std::string PhyRegForNode(TreePatternNode &Op, if (!Op.isLeaf()) return PhysReg; - Record *OpLeafRec = cast<DefInit>(Op.getLeafValue())->getDef(); + const Record *OpLeafRec = cast<DefInit>(Op.getLeafValue())->getDef(); if (!OpLeafRec->isSubClassOf("Register")) return PhysReg; @@ -448,7 +449,7 @@ static std::string PhyRegForNode(TreePatternNode &Op, return PhysReg; } -void FastISelMap::collectPatterns(CodeGenDAGPatterns &CGP) { +void FastISelMap::collectPatterns(const CodeGenDAGPatterns &CGP) { const CodeGenTarget &Target = CGP.getTargetInfo(); // Scan through all the patterns and record the simple ones. @@ -864,8 +865,8 @@ void FastISelMap::printFunctionDefinitions(raw_ostream &OS) { // TODO: SignaturesWithConstantForms should be empty here. } -static void EmitFastISel(RecordKeeper &RK, raw_ostream &OS) { - CodeGenDAGPatterns CGP(RK); +static void EmitFastISel(const RecordKeeper &RK, raw_ostream &OS) { + const CodeGenDAGPatterns CGP(RK); const CodeGenTarget &Target = CGP.getTargetInfo(); emitSourceFileHeader("\"Fast\" Instruction Selector for the " + Target.getName().str() + " target", diff --git a/llvm/utils/TableGen/InstrDocsEmitter.cpp b/llvm/utils/TableGen/InstrDocsEmitter.cpp index f53428ecdffe..d32cfa235454 100644 --- a/llvm/utils/TableGen/InstrDocsEmitter.cpp +++ b/llvm/utils/TableGen/InstrDocsEmitter.cpp @@ -61,9 +61,9 @@ static std::string escapeForRST(StringRef Str) { return Result; } -static void EmitInstrDocs(RecordKeeper &RK, raw_ostream &OS) { - CodeGenDAGPatterns CDP(RK); - CodeGenTarget &Target = CDP.getTargetInfo(); +static void EmitInstrDocs(const RecordKeeper &RK, raw_ostream &OS) { + const CodeGenDAGPatterns CDP(RK); + const CodeGenTarget &Target = CDP.getTargetInfo(); unsigned VariantCount = Target.getAsmParserVariantCount(); // Page title. @@ -86,7 +86,7 @@ static void EmitInstrDocs(RecordKeeper &RK, raw_ostream &OS) { // Assembly string(s). if (!II->AsmString.empty()) { for (unsigned VarNum = 0; VarNum < VariantCount; ++VarNum) { - Record *AsmVariant = Target.getAsmParserVariant(VarNum); + const Record *AsmVariant = Target.getAsmParserVariant(VarNum); OS << "Assembly string"; if (VariantCount != 1) OS << " (" << AsmVariant->getValueAsString("Name") << ")"; @@ -167,7 +167,7 @@ static void EmitInstrDocs(RecordKeeper &RK, raw_ostream &OS) { // names of both the compound operand and the basic operands it // contains. for (unsigned SubOpIdx = 0; SubOpIdx < Op.MINumOperands; ++SubOpIdx) { - Record *SubRec = + const Record *SubRec = cast<DefInit>(Op.MIOperandInfo->getArg(SubOpIdx))->getDef(); StringRef SubOpName = Op.MIOperandInfo->getArgNameStr(SubOpIdx); StringRef SubOpTypeName = SubRec->getName(); @@ -198,7 +198,7 @@ static void EmitInstrDocs(RecordKeeper &RK, raw_ostream &OS) { if (!II->ImplicitDefs.empty()) { OS << "Implicit defs: "; ListSeparator LS; - for (Record *Def : II->ImplicitDefs) + for (const Record *Def : II->ImplicitDefs) OS << LS << "``" << Def->getName() << "``"; OS << "\n\n"; } @@ -207,18 +207,18 @@ static void EmitInstrDocs(RecordKeeper &RK, raw_ostream &OS) { if (!II->ImplicitUses.empty()) { OS << "Implicit uses: "; ListSeparator LS; - for (Record *Use : II->ImplicitUses) + for (const Record *Use : II->ImplicitUses) OS << LS << "``" << Use->getName() << "``"; OS << "\n\n"; } // Predicates. - std::vector<Record *> Predicates = - II->TheDef->getValueAsListOfDefs("Predicates"); + std::vector<const Record *> Predicates = + II->TheDef->getValueAsListOfConstDefs("Predicates"); if (!Predicates.empty()) { OS << "Predicates: "; ListSeparator LS; - for (Record *P : Predicates) + for (const Record *P : Predicates) OS << LS << "``" << P->getName() << "``"; OS << "\n\n"; } diff --git a/llvm/utils/TableGen/InstrInfoEmitter.cpp b/llvm/utils/TableGen/InstrInfoEmitter.cpp index 4e2138d15fde..5830cdae7096 100644 --- a/llvm/utils/TableGen/InstrInfoEmitter.cpp +++ b/llvm/utils/TableGen/InstrInfoEmitter.cpp @@ -672,7 +672,8 @@ void InstrInfoEmitter::emitLogicalOperandTypeMappings( void InstrInfoEmitter::emitMCIIHelperMethods(raw_ostream &OS, StringRef TargetName) { - RecVec TIIPredicates = Records.getAllDerivedDefinitions("TIIPredicate"); + ArrayRef<const Record *> TIIPredicates = + Records.getAllDerivedDefinitions("TIIPredicate"); OS << "#ifdef GET_INSTRINFO_MC_HELPER_DECLS\n"; OS << "#undef GET_INSTRINFO_MC_HELPER_DECLS\n\n"; diff --git a/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp b/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp index f61a05861981..c4f238b67476 100644 --- a/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp +++ b/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp @@ -52,36 +52,37 @@ using namespace llvm; namespace { class MacroFusionPredicatorEmitter { - RecordKeeper &Records; - CodeGenTarget Target; + const RecordKeeper &Records; + const CodeGenTarget Target; - void emitMacroFusionDecl(ArrayRef<Record *> Fusions, PredicateExpander &PE, - raw_ostream &OS); - void emitMacroFusionImpl(ArrayRef<Record *> Fusions, PredicateExpander &PE, - raw_ostream &OS); - void emitPredicates(ArrayRef<Record *> FirstPredicate, bool IsCommutable, - PredicateExpander &PE, raw_ostream &OS); - void emitFirstPredicate(Record *SecondPredicate, bool IsCommutable, + void emitMacroFusionDecl(ArrayRef<const Record *> Fusions, + PredicateExpander &PE, raw_ostream &OS); + void emitMacroFusionImpl(ArrayRef<const Record *> Fusions, + PredicateExpander &PE, raw_ostream &OS); + void emitPredicates(ArrayRef<const Record *> FirstPredicate, + bool IsCommutable, PredicateExpander &PE, + raw_ostream &OS); + void emitFirstPredicate(const Record *SecondPredicate, bool IsCommutable, PredicateExpander &PE, raw_ostream &OS); - void emitSecondPredicate(Record *SecondPredicate, bool IsCommutable, + void emitSecondPredicate(const Record *SecondPredicate, bool IsCommutable, PredicateExpander &PE, raw_ostream &OS); - void emitBothPredicate(Record *Predicates, bool IsCommutable, + void emitBothPredicate(const Record *Predicates, bool IsCommutable, PredicateExpander &PE, raw_ostream &OS); public: - MacroFusionPredicatorEmitter(RecordKeeper &R) : Records(R), Target(R) {} + MacroFusionPredicatorEmitter(const RecordKeeper &R) : Records(R), Target(R) {} void run(raw_ostream &OS); }; } // End anonymous namespace. void MacroFusionPredicatorEmitter::emitMacroFusionDecl( - ArrayRef<Record *> Fusions, PredicateExpander &PE, raw_ostream &OS) { + ArrayRef<const Record *> Fusions, PredicateExpander &PE, raw_ostream &OS) { OS << "#ifdef GET_" << Target.getName() << "_MACRO_FUSION_PRED_DECL\n"; OS << "#undef GET_" << Target.getName() << "_MACRO_FUSION_PRED_DECL\n\n"; OS << "namespace llvm {\n"; - for (Record *Fusion : Fusions) { + for (const Record *Fusion : Fusions) { OS << "bool is" << Fusion->getName() << "(const TargetInstrInfo &, " << "const TargetSubtargetInfo &, " << "const MachineInstr *, " @@ -93,14 +94,14 @@ void MacroFusionPredicatorEmitter::emitMacroFusionDecl( } void MacroFusionPredicatorEmitter::emitMacroFusionImpl( - ArrayRef<Record *> Fusions, PredicateExpander &PE, raw_ostream &OS) { + ArrayRef<const Record *> Fusions, PredicateExpander &PE, raw_ostream &OS) { OS << "#ifdef GET_" << Target.getName() << "_MACRO_FUSION_PRED_IMPL\n"; OS << "#undef GET_" << Target.getName() << "_MACRO_FUSION_PRED_IMPL\n\n"; OS << "namespace llvm {\n"; - for (Record *Fusion : Fusions) { - std::vector<Record *> Predicates = - Fusion->getValueAsListOfDefs("Predicates"); + for (const Record *Fusion : Fusions) { + std::vector<const Record *> Predicates = + Fusion->getValueAsListOfConstDefs("Predicates"); bool IsCommutable = Fusion->getValueAsBit("IsCommutable"); OS << "bool is" << Fusion->getName() << "(\n"; @@ -121,12 +122,11 @@ void MacroFusionPredicatorEmitter::emitMacroFusionImpl( OS << "\n#endif\n"; } -void MacroFusionPredicatorEmitter::emitPredicates(ArrayRef<Record *> Predicates, - bool IsCommutable, - PredicateExpander &PE, - raw_ostream &OS) { - for (Record *Predicate : Predicates) { - Record *Target = Predicate->getValueAsDef("Target"); +void MacroFusionPredicatorEmitter::emitPredicates( + ArrayRef<const Record *> Predicates, bool IsCommutable, + PredicateExpander &PE, raw_ostream &OS) { + for (const Record *Predicate : Predicates) { + const Record *Target = Predicate->getValueAsDef("Target"); if (Target->getName() == "first_fusion_target") emitFirstPredicate(Predicate, IsCommutable, PE, OS); else if (Target->getName() == "second_fusion_target") @@ -139,7 +139,7 @@ void MacroFusionPredicatorEmitter::emitPredicates(ArrayRef<Record *> Predicates, } } -void MacroFusionPredicatorEmitter::emitFirstPredicate(Record *Predicate, +void MacroFusionPredicatorEmitter::emitFirstPredicate(const Record *Predicate, bool IsCommutable, PredicateExpander &PE, raw_ostream &OS) { @@ -172,7 +172,7 @@ void MacroFusionPredicatorEmitter::emitFirstPredicate(Record *Predicate, } } -void MacroFusionPredicatorEmitter::emitSecondPredicate(Record *Predicate, +void MacroFusionPredicatorEmitter::emitSecondPredicate(const Record *Predicate, bool IsCommutable, PredicateExpander &PE, raw_ostream &OS) { @@ -223,7 +223,7 @@ void MacroFusionPredicatorEmitter::emitSecondPredicate(Record *Predicate, } } -void MacroFusionPredicatorEmitter::emitBothPredicate(Record *Predicate, +void MacroFusionPredicatorEmitter::emitBothPredicate(const Record *Predicate, bool IsCommutable, PredicateExpander &PE, raw_ostream &OS) { @@ -277,9 +277,7 @@ void MacroFusionPredicatorEmitter::run(raw_ostream &OS) { PE.setByRef(false); PE.setExpandForMC(false); - std::vector<Record *> Fusions = Records.getAllDerivedDefinitions("Fusion"); - // Sort macro fusions by name. - sort(Fusions, LessRecord()); + ArrayRef<const Record *> Fusions = Records.getAllDerivedDefinitions("Fusion"); emitMacroFusionDecl(Fusions, PE, OS); OS << "\n"; emitMacroFusionImpl(Fusions, PE, OS); diff --git a/llvm/utils/TableGen/RegisterInfoEmitter.cpp b/llvm/utils/TableGen/RegisterInfoEmitter.cpp index e076832674bd..63e70698d7cd 100644 --- a/llvm/utils/TableGen/RegisterInfoEmitter.cpp +++ b/llvm/utils/TableGen/RegisterInfoEmitter.cpp @@ -123,7 +123,7 @@ void RegisterInfoEmitter::runEnums(raw_ostream &OS, CodeGenTarget &Target, if (!Namespace.empty()) OS << "namespace " << Namespace << " {\n"; - OS << "enum {\n NoRegister,\n"; + OS << "enum : unsigned {\n NoRegister,\n"; for (const auto &Reg : Registers) OS << " " << Reg.getName() << " = " << Reg.EnumValue << ",\n"; diff --git a/llvm/utils/TableGen/SubtargetEmitter.cpp b/llvm/utils/TableGen/SubtargetEmitter.cpp index 394e2eb42c15..c568f6747f4f 100644 --- a/llvm/utils/TableGen/SubtargetEmitter.cpp +++ b/llvm/utils/TableGen/SubtargetEmitter.cpp @@ -284,7 +284,7 @@ unsigned SubtargetEmitter::FeatureKeyValues(raw_ostream &OS, << "\"" << CommandLineName << "\", " << "\"" << Desc << "\", " << Target << "::" << Name << ", "; - RecVec ImpliesList = Feature->getValueAsListOfDefs("Implies"); + ConstRecVec ImpliesList = Feature->getValueAsListOfConstDefs("Implies"); printFeatureMask(OS, ImpliesList, FeatureMap); @@ -320,8 +320,9 @@ unsigned SubtargetEmitter::CPUKeyValues(raw_ostream &OS, for (const Record *Processor : ProcessorList) { StringRef Name = Processor->getValueAsString("Name"); - RecVec FeatureList = Processor->getValueAsListOfDefs("Features"); - RecVec TuneFeatureList = Processor->getValueAsListOfDefs("TuneFeatures"); + ConstRecVec FeatureList = Processor->getValueAsListOfConstDefs("Features"); + ConstRecVec TuneFeatureList = + Processor->getValueAsListOfConstDefs("TuneFeatures"); // Emit as "{ "cpu", "description", 0, { f1 , f2 , ... fn } },". OS << " { " @@ -366,7 +367,7 @@ void SubtargetEmitter::FormItineraryStageString(const std::string &Name, ItinString += " { " + itostr(Cycles) + ", "; // Get unit list - RecVec UnitList = Stage->getValueAsListOfDefs("Units"); + ConstRecVec UnitList = Stage->getValueAsListOfConstDefs("Units"); // For each unit for (unsigned j = 0, M = UnitList.size(); j < M;) { @@ -444,7 +445,7 @@ void SubtargetEmitter::EmitStageAndOperandCycleData( if (!ItinsDefSet.insert(ProcModel.ItinsDef).second) continue; - RecVec FUs = ProcModel.ItinsDef->getValueAsListOfDefs("FU"); + ConstRecVec FUs = ProcModel.ItinsDef->getValueAsListOfConstDefs("FU"); if (FUs.empty()) continue; @@ -458,7 +459,7 @@ void SubtargetEmitter::EmitStageAndOperandCycleData( OS << "} // end namespace " << Name << "FU\n"; - RecVec BPs = ProcModel.ItinsDef->getValueAsListOfDefs("BP"); + ConstRecVec BPs = ProcModel.ItinsDef->getValueAsListOfConstDefs("BP"); if (!BPs.empty()) { OS << "\n// Pipeline forwarding paths for itineraries \"" << Name << "\"\n" @@ -682,8 +683,7 @@ void SubtargetEmitter::EmitProcessorResourceSubUnits( const Record *PRDef = ProcModel.ProcResourceDefs[i]; if (!PRDef->isSubClassOf("ProcResGroup")) continue; - RecVec ResUnits = PRDef->getValueAsListOfDefs("Resources"); - for (const Record *RUDef : ResUnits) { + for (const Record *RUDef : PRDef->getValueAsListOfDefs("Resources")) { const Record *RU = SchedModels.findProcResUnits(RUDef, ProcModel, PRDef->getLoc()); for (unsigned J = 0; J < RU->getValueAsInt("NumUnits"); ++J) { @@ -842,8 +842,7 @@ void SubtargetEmitter::EmitProcessorResources(const CodeGenProcModel &ProcModel, const unsigned SubUnitsBeginOffset = SubUnitsOffset; int BufferSize = PRDef->getValueAsInt("BufferSize"); if (PRDef->isSubClassOf("ProcResGroup")) { - RecVec ResUnits = PRDef->getValueAsListOfDefs("Resources"); - for (const Record *RU : ResUnits) { + for (const Record *RU : PRDef->getValueAsListOfDefs("Resources")) { NumUnits += RU->getValueAsInt("NumUnits"); SubUnitsOffset += RU->getValueAsInt("NumUnits"); } @@ -1028,7 +1027,7 @@ void SubtargetEmitter::ExpandProcResources( for (const Record *PR : PM.ProcResourceDefs) { if (PR == PRDef || !PR->isSubClassOf("ProcResGroup")) continue; - RecVec SuperResources = PR->getValueAsListOfDefs("Resources"); + ConstRecVec SuperResources = PR->getValueAsListOfConstDefs("Resources"); ConstRecIter SubI = SubResources.begin(), SubE = SubResources.end(); for (; SubI != SubE; ++SubI) { if (!is_contained(SuperResources, *SubI)) { @@ -1105,16 +1104,18 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel, if (RWDef) { Writes.clear(); Reads.clear(); - SchedModels.findRWs(RWDef->getValueAsListOfDefs("OperandReadWrites"), - Writes, Reads); + SchedModels.findRWs( + RWDef->getValueAsListOfConstDefs("OperandReadWrites"), Writes, + Reads); } } if (Writes.empty()) { // Check this processor's itinerary class resources. for (const Record *I : ProcModel.ItinRWDefs) { - RecVec Matched = I->getValueAsListOfDefs("MatchedItinClasses"); + ConstRecVec Matched = + I->getValueAsListOfConstDefs("MatchedItinClasses"); if (is_contained(Matched, SC.ItinClassDef)) { - SchedModels.findRWs(I->getValueAsListOfDefs("OperandReadWrites"), + SchedModels.findRWs(I->getValueAsListOfConstDefs("OperandReadWrites"), Writes, Reads); break; } @@ -1274,7 +1275,8 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel, SCDesc.NumMicroOps = MCSchedClassDesc::InvalidNumMicroOps; break; } - RecVec ValidWrites = ReadAdvance->getValueAsListOfDefs("ValidWrites"); + ConstRecVec ValidWrites = + ReadAdvance->getValueAsListOfConstDefs("ValidWrites"); IdxVec WriteIDs; if (ValidWrites.empty()) WriteIDs.push_back(0); diff --git a/llvm/utils/TableGen/TableGenBackends.h b/llvm/utils/TableGen/TableGenBackends.h index e0d12abaaa03..fc3b87370766 100644 --- a/llvm/utils/TableGen/TableGenBackends.h +++ b/llvm/utils/TableGen/TableGenBackends.h @@ -61,7 +61,7 @@ namespace llvm { class raw_ostream; class RecordKeeper; -void EmitMapTable(RecordKeeper &RK, raw_ostream &OS); +void EmitMapTable(const RecordKeeper &RK, raw_ostream &OS); // Defined in DecoderEmitter.cpp void EmitDecoder(RecordKeeper &RK, raw_ostream &OS, diff --git a/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp b/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp index e9436ab16e44..7373494e8b12 100644 --- a/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp +++ b/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp @@ -19,28 +19,23 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/TableGen/Record.h" -namespace llvm { - static constexpr int WebAssemblyInstructionTableSize = 256; -void emitWebAssemblyDisassemblerTables( +void llvm::emitWebAssemblyDisassemblerTables( raw_ostream &OS, - const ArrayRef<const CodeGenInstruction *> &NumberedInstructions) { + ArrayRef<const CodeGenInstruction *> NumberedInstructions) { // First lets organize all opcodes by (prefix) byte. Prefix 0 is the // starting table. std::map<unsigned, std::map<unsigned, std::pair<unsigned, const CodeGenInstruction *>>> OpcodeTable; for (unsigned I = 0; I != NumberedInstructions.size(); ++I) { - auto &CGI = *NumberedInstructions[I]; - auto &Def = *CGI.TheDef; + const CodeGenInstruction &CGI = *NumberedInstructions[I]; + const Record &Def = *CGI.TheDef; if (!Def.getValue("Inst")) continue; - auto &Inst = *Def.getValueAsBitsInit("Inst"); - RecordKeeper &RK = Inst.getRecordKeeper(); - unsigned Opc = static_cast<unsigned>( - cast<IntInit>(Inst.convertInitializerTo(IntRecTy::get(RK))) - ->getValue()); + const BitsInit &Inst = *Def.getValueAsBitsInit("Inst"); + unsigned Opc = static_cast<unsigned>(*Inst.convertInitializerToInt()); if (Opc == 0xFFFFFFFF) continue; // No opcode defined. assert(Opc <= 0xFFFFFF); @@ -97,14 +92,14 @@ void emitWebAssemblyDisassemblerTables( OS << "};\n\n"; std::vector<std::string> OperandTable, CurOperandList; // Output one table per prefix. - for (auto &PrefixPair : OpcodeTable) { - if (PrefixPair.second.empty()) + for (const auto &[Prefix, Table] : OpcodeTable) { + if (Table.empty()) continue; - OS << "WebAssemblyInstruction InstructionTable" << PrefixPair.first; + OS << "WebAssemblyInstruction InstructionTable" << Prefix; OS << "[] = {\n"; for (unsigned I = 0; I < WebAssemblyInstructionTableSize; I++) { - auto InstIt = PrefixPair.second.find(I); - if (InstIt != PrefixPair.second.end()) { + auto InstIt = Table.find(I); + if (InstIt != Table.end()) { // Regular instruction. assert(InstIt->second.second); auto &CGI = *InstIt->second.second; @@ -144,7 +139,7 @@ void emitWebAssemblyDisassemblerTables( } else { auto PrefixIt = OpcodeTable.find(I); // If we have a non-empty table for it that's not 0, this is a prefix. - if (PrefixIt != OpcodeTable.end() && I && !PrefixPair.first) { + if (PrefixIt != OpcodeTable.end() && I && !Prefix) { OS << " { 0, ET_Prefix, 0, 0"; } else { OS << " { 0, ET_Unused, 0, 0"; @@ -163,15 +158,11 @@ void emitWebAssemblyDisassemblerTables( // Create a table of all extension tables: OS << "struct { uint8_t Prefix; const WebAssemblyInstruction *Table; }\n"; OS << "PrefixTable[] = {\n"; - for (auto &PrefixPair : OpcodeTable) { - if (PrefixPair.second.empty() || !PrefixPair.first) + for (const auto &[Prefix, Table] : OpcodeTable) { + if (Table.empty() || !Prefix) continue; - OS << " { " << PrefixPair.first << ", InstructionTable" - << PrefixPair.first; - OS << " },\n"; + OS << " { " << Prefix << ", InstructionTable" << Prefix << " },\n"; } OS << " { 0, nullptr }\n};\n\n"; OS << "} // end namespace llvm\n"; } - -} // namespace llvm diff --git a/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.h b/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.h index aba3a4bfd302..2d814cf0675a 100644 --- a/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.h +++ b/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.h @@ -22,8 +22,7 @@ class CodeGenInstruction; class raw_ostream; void emitWebAssemblyDisassemblerTables( - raw_ostream &OS, - const ArrayRef<const CodeGenInstruction *> &NumberedInstructions); + raw_ostream &OS, ArrayRef<const CodeGenInstruction *> NumberedInstructions); } // namespace llvm diff --git a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp index 8952c8e0a1c6..dfa10f74974c 100644 --- a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp +++ b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp @@ -63,8 +63,8 @@ static bool isExplicitUnalign(const CodeGenInstruction *Inst) { } class X86FoldTablesEmitter { - RecordKeeper &Records; - CodeGenTarget Target; + const RecordKeeper &Records; + const CodeGenTarget Target; // Represents an entry in the folding table class X86FoldTableEntry { @@ -196,7 +196,7 @@ class X86FoldTablesEmitter { FoldTable BroadcastTable4; public: - X86FoldTablesEmitter(RecordKeeper &R) : Records(R), Target(R) {} + X86FoldTablesEmitter(const RecordKeeper &R) : Records(R), Target(R) {} // run - Generate the 6 X86 memory fold tables. void run(raw_ostream &OS); @@ -670,7 +670,7 @@ void X86FoldTablesEmitter::run(raw_ostream &OS) { // added into memory fold tables. auto RegInstsForBroadcast = RegInsts; - Record *AsmWriter = Target.getAsmWriter(); + const Record *AsmWriter = Target.getAsmWriter(); unsigned Variant = AsmWriter->getValueAsInt("Variant"); auto FixUp = [&](const CodeGenInstruction *RegInst) { StringRef RegInstName = RegInst->TheDef->getName(); @@ -721,8 +721,8 @@ void X86FoldTablesEmitter::run(raw_ostream &OS) { // Add the manually mapped instructions listed above. for (const ManualMapEntry &Entry : ManualMapSet) { - Record *RegInstIter = Records.getDef(Entry.RegInstStr); - Record *MemInstIter = Records.getDef(Entry.MemInstStr); + const Record *RegInstIter = Records.getDef(Entry.RegInstStr); + const Record *MemInstIter = Records.getDef(Entry.MemInstStr); updateTables(&(Target.getInstruction(RegInstIter)), &(Target.getInstruction(MemInstIter)), Entry.Strategy, true); diff --git a/llvm/utils/TableGen/X86InstrMappingEmitter.cpp b/llvm/utils/TableGen/X86InstrMappingEmitter.cpp index 0abe353a9a57..f68c727cbe92 100644 --- a/llvm/utils/TableGen/X86InstrMappingEmitter.cpp +++ b/llvm/utils/TableGen/X86InstrMappingEmitter.cpp @@ -26,8 +26,8 @@ using namespace X86Disassembler; namespace { class X86InstrMappingEmitter { - RecordKeeper &Records; - CodeGenTarget Target; + const RecordKeeper &Records; + const CodeGenTarget Target; // Hold all pontentially compressible EVEX instructions std::vector<const CodeGenInstruction *> PreCompressionInsts; @@ -44,7 +44,7 @@ class X86InstrMappingEmitter { PredicateInstMap PredicateInsts; public: - X86InstrMappingEmitter(RecordKeeper &R) : Records(R), Target(R) {} + X86InstrMappingEmitter(const RecordKeeper &R) : Records(R), Target(R) {} // run - Output X86 EVEX compression tables. void run(raw_ostream &OS); @@ -63,8 +63,8 @@ private: void printClassDef(raw_ostream &OS); // Prints the given table as a C++ array of type X86TableEntry under the guard // \p Macro. - void printTable(const std::vector<Entry> &Table, StringRef Name, - StringRef Macro, raw_ostream &OS); + void printTable(ArrayRef<Entry> Table, StringRef Name, StringRef Macro, + raw_ostream &OS); }; void X86InstrMappingEmitter::printClassDef(raw_ostream &OS) { @@ -90,9 +90,8 @@ static void printMacroEnd(StringRef Macro, raw_ostream &OS) { OS << "#endif // " << Macro << "\n\n"; } -void X86InstrMappingEmitter::printTable(const std::vector<Entry> &Table, - StringRef Name, StringRef Macro, - raw_ostream &OS) { +void X86InstrMappingEmitter::printTable(ArrayRef<Entry> Table, StringRef Name, + StringRef Macro, raw_ostream &OS) { printMacroBegin(Macro, OS); OS << "static const X86TableEntry " << Name << "[] = {\n"; @@ -220,7 +219,7 @@ void X86InstrMappingEmitter::emitCompressEVEXTable( assert(NewRec && "Instruction not found!"); NewInst = &Target.getInstruction(NewRec); } else if (Name.ends_with("_EVEX")) { - if (auto *NewRec = Records.getDef(Name.drop_back(5))) + if (const auto *NewRec = Records.getDef(Name.drop_back(5))) NewInst = &Target.getInstruction(NewRec); } else if (Name.ends_with("_ND")) // Leave it to ND2NONND table. @@ -319,7 +318,7 @@ void X86InstrMappingEmitter::emitND2NonNDTable( if (!isInteresting(Rec) || NoCompressSet.find(Name) != NoCompressSet.end()) continue; if (ManualMap.find(Name) != ManualMap.end()) { - auto *NewRec = Records.getDef(ManualMap.at(Rec->getName())); + const auto *NewRec = Records.getDef(ManualMap.at(Rec->getName())); assert(NewRec && "Instruction not found!"); auto &NewInst = Target.getInstruction(NewRec); Table.push_back(std::pair(Inst, &NewInst)); @@ -328,10 +327,10 @@ void X86InstrMappingEmitter::emitND2NonNDTable( if (!Name.ends_with("_ND")) continue; - auto *NewRec = Records.getDef(Name.drop_back(3)); + const auto *NewRec = Records.getDef(Name.drop_back(3)); if (!NewRec) continue; - auto &NewInst = Target.getInstruction(NewRec); + const auto &NewInst = Target.getInstruction(NewRec); if (isRegisterOperand(NewInst.Operands[0].Rec)) Table.push_back(std::pair(Inst, &NewInst)); } @@ -353,15 +352,15 @@ void X86InstrMappingEmitter::emitSSE2AVXTable( if (!isInteresting(Rec)) continue; if (ManualMap.find(Name) != ManualMap.end()) { - auto *NewRec = Records.getDef(ManualMap.at(Rec->getName())); + const auto *NewRec = Records.getDef(ManualMap.at(Rec->getName())); assert(NewRec && "Instruction not found!"); - auto &NewInst = Target.getInstruction(NewRec); + const auto &NewInst = Target.getInstruction(NewRec); Table.push_back(std::pair(Inst, &NewInst)); continue; } std::string NewName = ("V" + Name).str(); - auto *AVXRec = Records.getDef(NewName); + const auto *AVXRec = Records.getDef(NewName); if (!AVXRec) continue; auto &AVXInst = Target.getInstruction(AVXRec); diff --git a/llvm/utils/TableGen/X86ManualInstrMapping.def b/llvm/utils/TableGen/X86ManualInstrMapping.def index d76c404722b0..bc539d792f38 100644 --- a/llvm/utils/TableGen/X86ManualInstrMapping.def +++ b/llvm/utils/TableGen/X86ManualInstrMapping.def @@ -32,6 +32,7 @@ NOCOMP(VPSRAQZ128ri) NOCOMP(VPSRAQZ128rm) NOCOMP(VPSRAQZ128rr) NOCOMP(VSCALEFPSZ128rm) +NOCOMP(VMOVZPDILo2PDIZrr) NOCOMP(VDBPSADBWZ256rmi) NOCOMP(VDBPSADBWZ256rri) NOCOMP(VPMAXSQZ256rm) diff --git a/llvm/utils/TableGen/X86MnemonicTables.cpp b/llvm/utils/TableGen/X86MnemonicTables.cpp index d9ceed40f7c7..ddbfb2af9869 100644 --- a/llvm/utils/TableGen/X86MnemonicTables.cpp +++ b/llvm/utils/TableGen/X86MnemonicTables.cpp @@ -22,10 +22,10 @@ using namespace llvm; namespace { class X86MnemonicTablesEmitter { - CodeGenTarget Target; + const CodeGenTarget Target; public: - X86MnemonicTablesEmitter(RecordKeeper &R) : Target(R) {} + X86MnemonicTablesEmitter(const RecordKeeper &R) : Target(R) {} // Output X86 mnemonic tables. void run(raw_ostream &OS); @@ -34,15 +34,13 @@ public: void X86MnemonicTablesEmitter::run(raw_ostream &OS) { emitSourceFileHeader("X86 Mnemonic tables", OS); OS << "namespace llvm {\nnamespace X86 {\n\n"; - Record *AsmWriter = Target.getAsmWriter(); + const Record *AsmWriter = Target.getAsmWriter(); unsigned Variant = AsmWriter->getValueAsInt("Variant"); // Hold all instructions grouped by mnemonic StringMap<SmallVector<const CodeGenInstruction *, 0>> MnemonicToCGInstrMap; - ArrayRef<const CodeGenInstruction *> NumberedInstructions = - Target.getInstructionsByEnumValue(); - for (const CodeGenInstruction *I : NumberedInstructions) { + for (const CodeGenInstruction *I : Target.getInstructionsByEnumValue()) { const Record *Def = I->TheDef; // Filter non-X86 instructions. if (!Def->isSubClassOf("X86Inst")) diff --git a/llvm/utils/TableGen/X86RecognizableInstr.cpp b/llvm/utils/TableGen/X86RecognizableInstr.cpp index a1e67e3ea692..4386e8361712 100644 --- a/llvm/utils/TableGen/X86RecognizableInstr.cpp +++ b/llvm/utils/TableGen/X86RecognizableInstr.cpp @@ -154,14 +154,13 @@ RecognizableInstr::RecognizableInstr(DisassemblerTables &tables, UID(uid), Spec(&tables.specForUID(uid)) { // Check for 64-bit inst which does not require REX // FIXME: Is there some better way to check for In64BitMode? - std::vector<Record *> Predicates = Rec->getValueAsListOfDefs("Predicates"); - for (unsigned i = 0, e = Predicates.size(); i != e; ++i) { - if (Predicates[i]->getName().contains("Not64Bit") || - Predicates[i]->getName().contains("In32Bit")) { + for (const Record *Predicate : Rec->getValueAsListOfConstDefs("Predicates")) { + if (Predicate->getName().contains("Not64Bit") || + Predicate->getName().contains("In32Bit")) { Is32Bit = true; break; } - if (Predicates[i]->getName().contains("In64Bit")) { + if (Predicate->getName().contains("In64Bit")) { Is64Bit = true; break; } diff --git a/llvm/utils/gn/secondary/clang-tools-extra/test/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/test/BUILD.gn index 2227ad42cf40..4f9ba335859b 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/test/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/test/BUILD.gn @@ -83,7 +83,6 @@ group("test") { "//clang-tools-extra/unittests", "//clang/lib/Headers", "//clang/tools/c-index-test", - "//clang/tools/clang-rename", "//clang/tools/driver:symlinks", "//llvm/tools/llvm-bcanalyzer", "//llvm/utils/FileCheck", diff --git a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn index 87a2e771dda6..cba7867854df 100644 --- a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn @@ -142,6 +142,7 @@ copy("Headers") { "avx10_2_512satcvtintrin.h", "avx10_2bf16intrin.h", "avx10_2convertintrin.h", + "avx10_2copyintrin.h", "avx10_2minmaxintrin.h", "avx10_2niintrin.h", "avx10_2satcvtdsintrin.h", diff --git a/llvm/utils/gn/secondary/clang/test/BUILD.gn b/llvm/utils/gn/secondary/clang/test/BUILD.gn index 1ec94a419f56..1d5b8025a12a 100644 --- a/llvm/utils/gn/secondary/clang/test/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/test/BUILD.gn @@ -171,7 +171,6 @@ group("test") { "//clang/tools/clang-installapi", "//clang/tools/clang-offload-bundler", "//clang/tools/clang-refactor", - "//clang/tools/clang-rename", "//clang/tools/clang-repl", "//clang/tools/clang-scan-deps", "//clang/tools/diagtool", diff --git a/llvm/utils/gn/secondary/clang/tools/clang-rename/BUILD.gn b/llvm/utils/gn/secondary/clang/tools/clang-rename/BUILD.gn deleted file mode 100644 index 1c517b989345..000000000000 --- a/llvm/utils/gn/secondary/clang/tools/clang-rename/BUILD.gn +++ /dev/null @@ -1,14 +0,0 @@ -executable("clang-rename") { - configs += [ "//llvm/utils/gn/build:clang_code" ] - deps = [ - "//clang/lib/Basic", - "//clang/lib/Frontend", - "//clang/lib/Rewrite", - "//clang/lib/Tooling", - "//clang/lib/Tooling/Core", - "//clang/lib/Tooling/Refactoring", - "//llvm/lib/Option", - "//llvm/lib/Support", - ] - sources = [ "ClangRename.cpp" ] -} diff --git a/llvm/utils/gn/secondary/clang/unittests/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/BUILD.gn index a6a4a5708341..4aa844ac5a3c 100644 --- a/llvm/utils/gn/secondary/clang/unittests/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/unittests/BUILD.gn @@ -16,7 +16,6 @@ group("unittests") { "InstallAPI:InstallAPITests", "Interpreter:ClangReplInterpreterTests", "Lex:LexTests", - "Rename:ClangRenameTests", "Rewrite:RewriteTests", "Sema:SemaTests", "Serialization:SerializationTests", diff --git a/llvm/utils/gn/secondary/clang/unittests/Rename/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/Rename/BUILD.gn deleted file mode 100644 index 10c922424186..000000000000 --- a/llvm/utils/gn/secondary/clang/unittests/Rename/BUILD.gn +++ /dev/null @@ -1,28 +0,0 @@ -import("//third-party/unittest/unittest.gni") - -unittest("ClangRenameTests") { - configs += [ "//llvm/utils/gn/build:clang_code" ] - - # We'd like clang/unittests/Tooling/RewriterTestContext.h in the test. - include_dirs = [ "../.." ] - - deps = [ - "//clang/lib/AST", - "//clang/lib/ASTMatchers", - "//clang/lib/Basic", - "//clang/lib/Format", - "//clang/lib/Frontend", - "//clang/lib/Rewrite", - "//clang/lib/Tooling", - "//clang/lib/Tooling/Core", - "//clang/lib/Tooling/Refactoring", - "//llvm/lib/Support", - ] - sources = [ - "RenameAliasTest.cpp", - "RenameClassTest.cpp", - "RenameEnumTest.cpp", - "RenameFunctionTest.cpp", - "RenameMemberTest.cpp", - ] -} diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn index 1e2c0cdc8630..2fd3b9a7dd3d 100644 --- a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn @@ -275,8 +275,8 @@ static_library("builtins") { sources -= [ "fp_mode.c" ] sources += [ "cpu_model/x86.c", - "i386/fp_mode.c", "extendbfsf2.c", + "i386/fp_mode.c", "truncdfbf2.c", "truncsfbf2.c", ] diff --git a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn index 02532f63dd67..8a7bb4a27923 100644 --- a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn @@ -101,7 +101,6 @@ write_cmake_config("config") { "HAVE_PTHREAD_SET_NAME_NP=", "HAVE_SIGNAL_H=1", "HAVE_SYS_STAT_H=1", - "HAVE_SYS_TYPES_H=1", "HAVE_VALGRIND_VALGRIND_H=", "HAVE__ALLOCA=", "HAVE___ALLOCA=", @@ -228,7 +227,6 @@ write_cmake_config("config") { "HAVE_SYSCONF=", "HAVE_SYS_IOCTL_H=", "HAVE_SYS_MMAN_H=", - "HAVE_SYS_PARAM_H=", "HAVE_SYS_RESOURCE_H=", "HAVE_SYS_TIME_H=", "HAVE_TERMIOS_H=", @@ -264,7 +262,6 @@ write_cmake_config("config") { "HAVE_SYSCONF=1", "HAVE_SYS_IOCTL_H=1", "HAVE_SYS_MMAN_H=1", - "HAVE_SYS_PARAM_H=1", "HAVE_SYS_RESOURCE_H=1", "HAVE_SYS_TIME_H=1", "HAVE_TERMIOS_H=1", diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/Vectorize/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/Vectorize/BUILD.gn index 0900872d4449..853cf341e284 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Transforms/Vectorize/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/Vectorize/BUILD.gn @@ -15,6 +15,7 @@ static_library("Vectorize") { "SLPVectorizer.cpp", "SandboxVectorizer/DependencyGraph.cpp", "SandboxVectorizer/Passes/BottomUpVec.cpp", + "SandboxVectorizer/Region.cpp", "SandboxVectorizer/SandboxVectorizer.cpp", "VPlan.cpp", "VPlanAnalysis.cpp", diff --git a/llvm/utils/gn/secondary/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/BUILD.gn index f9f519cf65da..a91cb838c5e2 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/BUILD.gn @@ -9,5 +9,9 @@ unittest("SandboxVectorizerTests") { "//llvm/lib/TargetParser", "//llvm/lib/Transforms/Vectorize", ] - sources = [ "DependencyGraphTest.cpp" ] + sources = [ + "DependencyGraphTest.cpp", + "LegalityTest.cpp", + "RegionTest.cpp", + ] } diff --git a/llvm/utils/yaml-bench/YAMLBench.cpp b/llvm/utils/yaml-bench/YAMLBench.cpp index 50e55538a011..4dc6caeb6fdb 100644 --- a/llvm/utils/yaml-bench/YAMLBench.cpp +++ b/llvm/utils/yaml-bench/YAMLBench.cpp @@ -56,17 +56,6 @@ cl::opt<cl::boolOrDefault> UseColor("use-color", cl::desc("Emit colored output (default=autodetect)"), cl::init(cl::BOU_UNSET)); -struct indent { - unsigned distance; - indent(unsigned d) : distance(d) {} -}; - -static raw_ostream &operator <<(raw_ostream &os, const indent &in) { - for (unsigned i = 0; i < in.distance; ++i) - os << " "; - return os; -} - /// Pretty print a tag by replacing tag:yaml.org,2002: with !!. static std::string prettyTag(yaml::Node *N) { std::string Tag = N->getVerbatimTag(); diff --git a/mlir/include/mlir-c/Pass.h b/mlir/include/mlir-c/Pass.h index 35db138305d1..2218ec0f47d1 100644 --- a/mlir/include/mlir-c/Pass.h +++ b/mlir/include/mlir-c/Pass.h @@ -74,9 +74,11 @@ mlirPassManagerGetAsOpPassManager(MlirPassManager passManager); MLIR_CAPI_EXPORTED MlirLogicalResult mlirPassManagerRunOnOp(MlirPassManager passManager, MlirOperation op); -/// Enable mlir-print-ir-after-all. -MLIR_CAPI_EXPORTED void -mlirPassManagerEnableIRPrinting(MlirPassManager passManager); +/// Enable IR printing. +MLIR_CAPI_EXPORTED void mlirPassManagerEnableIRPrinting( + MlirPassManager passManager, bool printBeforeAll, bool printAfterAll, + bool printModuleScope, bool printAfterOnlyOnChange, + bool printAfterOnlyOnFailure); /// Enable / disable verify-each. MLIR_CAPI_EXPORTED void diff --git a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h index 67baa8777a6f..8eb711962583 100644 --- a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h @@ -73,10 +73,20 @@ void populateGpuBreakDownSubgroupReducePatterns( /// Collect a set of patterns to lower `gpu.subgroup_reduce` into `gpu.shuffle` /// ops over `shuffleBitwidth` scalar types. Assumes that the subgroup has /// `subgroupSize` lanes. Uses the butterfly shuffle algorithm. +/// +/// The patterns populated by this function will ignore ops with the +/// `cluster_size` attribute. +/// `populateGpuLowerClusteredSubgroupReduceToShufflePatterns` is the opposite. void populateGpuLowerSubgroupReduceToShufflePatterns( RewritePatternSet &patterns, unsigned subgroupSize, unsigned shuffleBitwidth = 32, PatternBenefit benefit = 1); +/// Disjoint counterpart of `populateGpuLowerSubgroupReduceToShufflePatterns` +/// that only matches `gpu.subgroup_reduce` ops with a `cluster_size`. +void populateGpuLowerClusteredSubgroupReduceToShufflePatterns( + RewritePatternSet &patterns, unsigned subgroupSize, + unsigned shuffleBitwidth = 32, PatternBenefit benefit = 1); + /// Collect all patterns to rewrite ops within the GPU dialect. inline void populateGpuRewritePatterns(RewritePatternSet &patterns) { populateGpuAllReducePatterns(patterns); diff --git a/mlir/lib/Bindings/Python/Pass.cpp b/mlir/lib/Bindings/Python/Pass.cpp index a68421b61641..1d0e5ce2115a 100644 --- a/mlir/lib/Bindings/Python/Pass.cpp +++ b/mlir/lib/Bindings/Python/Pass.cpp @@ -74,10 +74,17 @@ void mlir::python::populatePassManagerSubmodule(py::module &m) { "Releases (leaks) the backing pass manager (testing)") .def( "enable_ir_printing", - [](PyPassManager &passManager) { - mlirPassManagerEnableIRPrinting(passManager.get()); + [](PyPassManager &passManager, bool printBeforeAll, + bool printAfterAll, bool printModuleScope, bool printAfterChange, + bool printAfterFailure) { + mlirPassManagerEnableIRPrinting( + passManager.get(), printBeforeAll, printAfterAll, + printModuleScope, printAfterChange, printAfterFailure); }, - "Enable mlir-print-ir-after-all.") + "print_before_all"_a = false, "print_after_all"_a = true, + "print_module_scope"_a = false, "print_after_change"_a = false, + "print_after_failure"_a = false, + "Enable IR printing, default as mlir-print-ir-after-all.") .def( "enable_verifier", [](PyPassManager &passManager, bool enable) { diff --git a/mlir/lib/CAPI/IR/Pass.cpp b/mlir/lib/CAPI/IR/Pass.cpp index d242baae99c0..a6c9fbd08d45 100644 --- a/mlir/lib/CAPI/IR/Pass.cpp +++ b/mlir/lib/CAPI/IR/Pass.cpp @@ -44,8 +44,21 @@ MlirLogicalResult mlirPassManagerRunOnOp(MlirPassManager passManager, return wrap(unwrap(passManager)->run(unwrap(op))); } -void mlirPassManagerEnableIRPrinting(MlirPassManager passManager) { - return unwrap(passManager)->enableIRPrinting(); +void mlirPassManagerEnableIRPrinting(MlirPassManager passManager, + bool printBeforeAll, bool printAfterAll, + bool printModuleScope, + bool printAfterOnlyOnChange, + bool printAfterOnlyOnFailure) { + auto shouldPrintBeforePass = [printBeforeAll](Pass *, Operation *) { + return printBeforeAll; + }; + auto shouldPrintAfterPass = [printAfterAll](Pass *, Operation *) { + return printAfterAll; + }; + return unwrap(passManager) + ->enableIRPrinting(shouldPrintBeforePass, shouldPrintAfterPass, + printModuleScope, printAfterOnlyOnChange, + printAfterOnlyOnFailure); } void mlirPassManagerEnableVerifier(MlirPassManager passManager, bool enable) { diff --git a/mlir/lib/Dialect/GPU/Transforms/SubgroupReduceLowering.cpp b/mlir/lib/Dialect/GPU/Transforms/SubgroupReduceLowering.cpp index b166f1cd469a..185f824351a2 100644 --- a/mlir/lib/Dialect/GPU/Transforms/SubgroupReduceLowering.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/SubgroupReduceLowering.cpp @@ -210,13 +210,21 @@ Value createSubgroupShuffleReduction(OpBuilder &builder, Location loc, struct ScalarSubgroupReduceToShuffles final : OpRewritePattern<gpu::SubgroupReduceOp> { ScalarSubgroupReduceToShuffles(MLIRContext *ctx, unsigned subgroupSize, - unsigned shuffleBitwidth, + unsigned shuffleBitwidth, bool matchClustered, PatternBenefit benefit) : OpRewritePattern(ctx, benefit), subgroupSize(subgroupSize), - shuffleBitwidth(shuffleBitwidth) {} + shuffleBitwidth(shuffleBitwidth), matchClustered(matchClustered) {} LogicalResult matchAndRewrite(gpu::SubgroupReduceOp op, PatternRewriter &rewriter) const override { + if (op.getClusterSize().has_value() != matchClustered) { + return rewriter.notifyMatchFailure( + op, llvm::formatv("op is {0}clustered but pattern is configured to " + "only match {1}clustered ops", + matchClustered ? "non-" : "", + matchClustered ? "" : "non-")); + } + auto ci = getAndValidateClusterInfo(op, subgroupSize); if (failed(ci)) return failure(); @@ -262,19 +270,28 @@ struct ScalarSubgroupReduceToShuffles final private: unsigned subgroupSize = 0; unsigned shuffleBitwidth = 0; + bool matchClustered = false; }; /// Lowers vector gpu subgroup reductions to a series of shuffles. struct VectorSubgroupReduceToShuffles final : OpRewritePattern<gpu::SubgroupReduceOp> { VectorSubgroupReduceToShuffles(MLIRContext *ctx, unsigned subgroupSize, - unsigned shuffleBitwidth, + unsigned shuffleBitwidth, bool matchClustered, PatternBenefit benefit) : OpRewritePattern(ctx, benefit), subgroupSize(subgroupSize), - shuffleBitwidth(shuffleBitwidth) {} + shuffleBitwidth(shuffleBitwidth), matchClustered(matchClustered) {} LogicalResult matchAndRewrite(gpu::SubgroupReduceOp op, PatternRewriter &rewriter) const override { + if (op.getClusterSize().has_value() != matchClustered) { + return rewriter.notifyMatchFailure( + op, llvm::formatv("op is {0}clustered but pattern is configured to " + "only match {1}clustered ops", + matchClustered ? "non-" : "", + matchClustered ? "" : "non-")); + } + auto ci = getAndValidateClusterInfo(op, subgroupSize); if (failed(ci)) return failure(); @@ -343,6 +360,7 @@ struct VectorSubgroupReduceToShuffles final private: unsigned subgroupSize = 0; unsigned shuffleBitwidth = 0; + bool matchClustered = false; }; } // namespace @@ -358,5 +376,14 @@ void mlir::populateGpuLowerSubgroupReduceToShufflePatterns( RewritePatternSet &patterns, unsigned subgroupSize, unsigned shuffleBitwidth, PatternBenefit benefit) { patterns.add<ScalarSubgroupReduceToShuffles, VectorSubgroupReduceToShuffles>( - patterns.getContext(), subgroupSize, shuffleBitwidth, benefit); + patterns.getContext(), subgroupSize, shuffleBitwidth, + /*matchClustered=*/false, benefit); +} + +void mlir::populateGpuLowerClusteredSubgroupReduceToShufflePatterns( + RewritePatternSet &patterns, unsigned subgroupSize, + unsigned shuffleBitwidth, PatternBenefit benefit) { + patterns.add<ScalarSubgroupReduceToShuffles, VectorSubgroupReduceToShuffles>( + patterns.getContext(), subgroupSize, shuffleBitwidth, + /*matchClustered=*/true, benefit); } diff --git a/mlir/lib/IR/AsmPrinter.cpp b/mlir/lib/IR/AsmPrinter.cpp index c7ed158aabb6..32182c083a8a 100644 --- a/mlir/lib/IR/AsmPrinter.cpp +++ b/mlir/lib/IR/AsmPrinter.cpp @@ -584,9 +584,8 @@ private: struct InProgressAliasInfo { InProgressAliasInfo() : aliasDepth(0), isType(false), canBeDeferred(false) {} - InProgressAliasInfo(StringRef alias, bool isType, bool canBeDeferred) - : alias(alias), aliasDepth(1), isType(isType), - canBeDeferred(canBeDeferred) {} + InProgressAliasInfo(StringRef alias) + : alias(alias), aliasDepth(1), isType(false), canBeDeferred(false) {} bool operator<(const InProgressAliasInfo &rhs) const { // Order first by depth, then by attr/type kind, and then by name. @@ -1096,6 +1095,8 @@ std::pair<size_t, size_t> AliasInitializer::visitImpl( // Try to generate an alias for this value. generateAlias(value, it->second, canBeDeferred); + it->second.isType = std::is_base_of_v<Type, T>; + it->second.canBeDeferred = canBeDeferred; // Print the value, capturing any nested elements that require aliases. SmallVector<size_t> childAliases; @@ -1153,8 +1154,7 @@ void AliasInitializer::generateAlias(T symbol, InProgressAliasInfo &alias, sanitizeIdentifier(nameBuffer, tempBuffer, /*allowedPunctChars=*/"$_-", /*allowTrailingDigit=*/false); name = name.copy(aliasAllocator); - alias = InProgressAliasInfo(name, /*isType=*/std::is_base_of_v<Type, T>, - canBeDeferred); + alias = InProgressAliasInfo(name); } //===----------------------------------------------------------------------===// diff --git a/mlir/python/mlir/_mlir_libs/_mlir/passmanager.pyi b/mlir/python/mlir/_mlir_libs/_mlir/passmanager.pyi index c072d5e0fb86..5d115e8222d7 100644 --- a/mlir/python/mlir/_mlir_libs/_mlir/passmanager.pyi +++ b/mlir/python/mlir/_mlir_libs/_mlir/passmanager.pyi @@ -16,7 +16,14 @@ class PassManager: def __init__(self, context: Optional[_ir.Context] = None) -> None: ... def _CAPICreate(self) -> object: ... def _testing_release(self) -> None: ... - def enable_ir_printing(self) -> None: ... + def enable_ir_printing( + self, + print_before_all: bool = False, + print_after_all: bool = True, + print_module_scope: bool = False, + print_after_change: bool = False, + print_after_failure: bool = False, + ) -> None: ... def enable_verifier(self, enable: bool) -> None: ... @staticmethod def parse(pipeline: str, context: Optional[_ir.Context] = None) -> PassManager: ... diff --git a/mlir/test/IR/print-attr-type-aliases.mlir b/mlir/test/IR/print-attr-type-aliases.mlir index 27c5a75addbb..e878d862076c 100644 --- a/mlir/test/IR/print-attr-type-aliases.mlir +++ b/mlir/test/IR/print-attr-type-aliases.mlir @@ -1,6 +1,6 @@ -// RUN: mlir-opt %s -split-input-file | FileCheck %s +// RUN: mlir-opt %s -split-input-file -mlir-print-debuginfo | FileCheck %s // Verify printer of type & attr aliases. -// RUN: mlir-opt %s -split-input-file | mlir-opt -split-input-file | FileCheck %s +// RUN: mlir-opt %s -split-input-file -mlir-print-debuginfo | mlir-opt -split-input-file -mlir-print-debuginfo | FileCheck %s // CHECK-DAG: #test2Ealias = "alias_test:dot_in_name" "test.op"() {alias_test = "alias_test:dot_in_name"} : () -> () @@ -32,16 +32,16 @@ // CHECK-DAG: tensor<32x!test_ui8_> "test.op"() : () -> tensor<32x!test.int<unsigned, 8>> -// CHECK-DAG: #loc = loc("nested") -// CHECK-DAG: #loc1 = loc("test.mlir":10:8) -// CHECK-DAG: #loc2 = loc(fused<#loc>[#loc1]) +// CHECK-DAG: #[[LOC_NESTED:.+]] = loc("nested") +// CHECK-DAG: #[[LOC_RAW:.+]] = loc("test.mlir":10:8) +// CHECK-DAG: = loc(fused<#[[LOC_NESTED]]>[#[[LOC_RAW]]]) "test.op"() {alias_test = loc(fused<loc("nested")>["test.mlir":10:8])} : () -> () // ----- // Check proper ordering of intermixed attribute/type aliases. // CHECK: !tuple = tuple< -// CHECK: #loc1 = loc(fused<!tuple +// CHECK: = loc(fused<!tuple "test.op"() {alias_test = loc(fused<tuple<i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32>>["test.mlir":10:8])} : () -> () // ----- @@ -54,7 +54,7 @@ // ----- // Check that we don't print aliases for things that aren't printed. -// CHECK: #loc1 = loc(fused<memref<1xi32> +// CHECK: = loc(fused<memref<1xi32> // CHECK-NOT: #map "test.op"() {alias_test = loc(fused<memref<1xi32, affine_map<(d0) -> (d0)>>>["test.mlir":10:8])} : () -> () @@ -71,3 +71,16 @@ "test.op"() {attr = #test.conditional_alias<#unalias_me>} : () -> () // CHECK-NEXT: #test.conditional_alias<#test2Ealias> "test.op"() {attr = #test.conditional_alias<#keep_aliased>} : () -> () + +// ----- + +// Check that a deferred no_alias attr can be un-deferred. + +#keep_aliased = "alias_test:dot_in_name" +#cond_alias = #test.conditional_alias<#keep_aliased> +#no_alias = loc(fused<#cond_alias>["test.mlir":1:1]) + +// CHECK: #[[TEST_ALIAS:.+]] = "alias_test:dot_in_name" +// CHECK: fused<#test.conditional_alias<#[[TEST_ALIAS]]> +// CHECK: "test.op" +"test.op"() {attr = #no_alias} : () -> () loc(fused<#no_alias>["test.mlir":0:0]) diff --git a/mlir/test/lib/Dialect/GPU/TestGpuRewrite.cpp b/mlir/test/lib/Dialect/GPU/TestGpuRewrite.cpp index 99a914506b01..74d057c0b7b6 100644 --- a/mlir/test/lib/Dialect/GPU/TestGpuRewrite.cpp +++ b/mlir/test/lib/Dialect/GPU/TestGpuRewrite.cpp @@ -78,9 +78,12 @@ struct TestGpuSubgroupReduceLoweringPass populateGpuBreakDownSubgroupReducePatterns(patterns, /*maxShuffleBitwidth=*/32, PatternBenefit(2)); - if (expandToShuffles) + if (expandToShuffles) { populateGpuLowerSubgroupReduceToShufflePatterns( patterns, /*subgroupSize=*/32, /*shuffleBitwidth=*/32); + populateGpuLowerClusteredSubgroupReduceToShufflePatterns( + patterns, /*subgroupSize=*/32, /*shuffleBitwidth=*/32); + } (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns)); } diff --git a/mlir/test/python/execution_engine.py b/mlir/test/python/execution_engine.py index 1cdda63eefe3..7c375ce81de0 100644 --- a/mlir/test/python/execution_engine.py +++ b/mlir/test/python/execution_engine.py @@ -1,4 +1,4 @@ -# RUN: %PYTHON %s 2>&1 | FileCheck %s +# RUN: env MLIR_RUNNER_UTILS=%mlir_runner_utils MLIR_C_RUNNER_UTILS=%mlir_c_runner_utils %PYTHON %s 2>&1 | FileCheck %s # REQUIRES: host-supports-jit import gc, sys, os, tempfile from mlir.ir import * @@ -7,6 +7,12 @@ from mlir.execution_engine import * from mlir.runtime import * from ml_dtypes import bfloat16, float8_e5m2 +MLIR_RUNNER_UTILS = os.getenv( + "MLIR_RUNNER_UTILS", "../../../../lib/libmlir_runner_utils.so" +) +MLIR_C_RUNNER_UTILS = os.getenv( + "MLIR_C_RUNNER_UTILS", "../../../../lib/libmlir_c_runner_utils.so" +) # Log everything to stderr and flush so that we have a unified stream to match # errors/info emitted by MLIR to stderr. @@ -700,8 +706,8 @@ def testSharedLibLoad(): ] else: shared_libs = [ - "../../../../lib/libmlir_runner_utils.so", - "../../../../lib/libmlir_c_runner_utils.so", + MLIR_RUNNER_UTILS, + MLIR_C_RUNNER_UTILS, ] execution_engine = ExecutionEngine( @@ -743,8 +749,8 @@ def testNanoTime(): ] else: shared_libs = [ - "../../../../lib/libmlir_runner_utils.so", - "../../../../lib/libmlir_c_runner_utils.so", + MLIR_RUNNER_UTILS, + MLIR_C_RUNNER_UTILS, ] execution_engine = ExecutionEngine( diff --git a/mlir/test/python/pass_manager.py b/mlir/test/python/pass_manager.py index 43af80b53166..749670325623 100644 --- a/mlir/test/python/pass_manager.py +++ b/mlir/test/python/pass_manager.py @@ -300,14 +300,40 @@ def testPrintIrAfterAll(): pm = PassManager.parse("builtin.module(canonicalize)") ctx.enable_multithreading(False) pm.enable_ir_printing() - # CHECK: // -----// IR Dump Before Canonicalizer (canonicalize) ('builtin.module' operation) //----- // + # CHECK: // -----// IR Dump After Canonicalizer (canonicalize) //----- // + # CHECK: module { + # CHECK: func.func @main() { + # CHECK: return + # CHECK: } + # CHECK: } + pm.run(module) + + +# CHECK-LABEL: TEST: testPrintIrBeforeAndAfterAll +@run +def testPrintIrBeforeAndAfterAll(): + with Context() as ctx: + module = ModuleOp.parse( + """ + module { + func.func @main() { + %0 = arith.constant 10 + return + } + } + """ + ) + pm = PassManager.parse("builtin.module(canonicalize)") + ctx.enable_multithreading(False) + pm.enable_ir_printing(print_before_all=True, print_after_all=True) + # CHECK: // -----// IR Dump Before Canonicalizer (canonicalize) //----- // # CHECK: module { # CHECK: func.func @main() { # CHECK: %[[C10:.*]] = arith.constant 10 : i64 # CHECK: return # CHECK: } # CHECK: } - # CHECK: // -----// IR Dump After Canonicalizer (canonicalize) ('builtin.module' operation) //----- // + # CHECK: // -----// IR Dump After Canonicalizer (canonicalize) //----- // # CHECK: module { # CHECK: func.func @main() { # CHECK: return diff --git a/mlir/test/tblgen-to-irdl/CMathDialect.td b/mlir/test/tblgen-to-irdl/CMathDialect.td index 454543e074c4..abda7ca41e9d 100644 --- a/mlir/test/tblgen-to-irdl/CMathDialect.td +++ b/mlir/test/tblgen-to-irdl/CMathDialect.td @@ -19,12 +19,14 @@ class CMath_Op<string mnemonic, list<Trait> traits = []> def f32Orf64Type : Or<[CPred<"::llvm::isa<::mlir::F32>">, CPred<"::llvm::isa<::mlir::F64>">]>; +// CHECK: irdl.type @"!complex" def CMath_ComplexType : CMath_Type<"ComplexType", "complex"> { let parameters = (ins f32Orf64Type:$elementType); + let assemblyFormat = "`<` $elementType `>`"; } // CHECK: irdl.operation @identity { -// CHECK-NEXT: %0 = irdl.base "!cmath.complex" +// CHECK-NEXT: %0 = irdl.base @cmath::@"!complex" // CHECK-NEXT: irdl.results(%0) // CHECK-NEXT: } def CMath_IdentityOp : CMath_Op<"identity"> { @@ -32,9 +34,9 @@ def CMath_IdentityOp : CMath_Op<"identity"> { } // CHECK: irdl.operation @mul { -// CHECK-NEXT: %0 = irdl.base "!cmath.complex" -// CHECK-NEXT: %1 = irdl.base "!cmath.complex" -// CHECK-NEXT: %2 = irdl.base "!cmath.complex" +// CHECK-NEXT: %0 = irdl.base @cmath::@"!complex" +// CHECK-NEXT: %1 = irdl.base @cmath::@"!complex" +// CHECK-NEXT: %2 = irdl.base @cmath::@"!complex" // CHECK-NEXT: irdl.operands(%0, %1) // CHECK-NEXT: irdl.results(%2) // CHECK-NEXT: } @@ -45,7 +47,7 @@ def CMath_MulOp : CMath_Op<"mul"> { // CHECK: irdl.operation @norm { // CHECK-NEXT: %0 = irdl.any -// CHECK-NEXT: %1 = irdl.base "!cmath.complex" +// CHECK-NEXT: %1 = irdl.base @cmath::@"!complex" // CHECK-NEXT: irdl.operands(%0) // CHECK-NEXT: irdl.results(%1) // CHECK-NEXT: } diff --git a/mlir/test/tblgen-to-irdl/TestDialect.td b/mlir/test/tblgen-to-irdl/TestDialect.td index 2622c8177607..4fea3d8576e9 100644 --- a/mlir/test/tblgen-to-irdl/TestDialect.td +++ b/mlir/test/tblgen-to-irdl/TestDialect.td @@ -16,8 +16,11 @@ class Test_Type<string name, string typeMnemonic, list<Trait> traits = []> class Test_Op<string mnemonic, list<Trait> traits = []> : Op<Test_Dialect, mnemonic, traits>; +// CHECK: irdl.type @"!singleton_a" def Test_SingletonAType : Test_Type<"SingletonAType", "singleton_a"> {} +// CHECK: irdl.type @"!singleton_b" def Test_SingletonBType : Test_Type<"SingletonBType", "singleton_b"> {} +// CHECK: irdl.type @"!singleton_c" def Test_SingletonCType : Test_Type<"SingletonCType", "singleton_c"> {} @@ -26,7 +29,7 @@ def Test_AndOp : Test_Op<"and"> { let arguments = (ins AllOfType<[Test_SingletonAType, AnyType]>:$in); } // CHECK-LABEL: irdl.operation @and { -// CHECK-NEXT: %[[v0:[^ ]*]] = irdl.base "!test.singleton_a" +// CHECK-NEXT: %[[v0:[^ ]*]] = irdl.base @test::@"!singleton_a" // CHECK-NEXT: %[[v1:[^ ]*]] = irdl.any // CHECK-NEXT: %[[v2:[^ ]*]] = irdl.all_of(%[[v0]], %[[v1]]) // CHECK-NEXT: irdl.operands(%[[v2]]) @@ -79,9 +82,9 @@ def Test_OrOp : Test_Op<"or"> { let arguments = (ins AnyTypeOf<[Test_SingletonAType, Test_SingletonBType, Test_SingletonCType]>:$in); } // CHECK-LABEL: irdl.operation @or { -// CHECK-NEXT: %[[v0:[^ ]*]] = irdl.base "!test.singleton_a" -// CHECK-NEXT: %[[v1:[^ ]*]] = irdl.base "!test.singleton_b" -// CHECK-NEXT: %[[v2:[^ ]*]] = irdl.base "!test.singleton_c" +// CHECK-NEXT: %[[v0:[^ ]*]] = irdl.base @test::@"!singleton_a" +// CHECK-NEXT: %[[v1:[^ ]*]] = irdl.base @test::@"!singleton_b" +// CHECK-NEXT: %[[v2:[^ ]*]] = irdl.base @test::@"!singleton_c" // CHECK-NEXT: %[[v3:[^ ]*]] = irdl.any_of(%[[v0]], %[[v1]], %[[v2]]) // CHECK-NEXT: irdl.operands(%[[v3]]) // CHECK-NEXT: } @@ -114,8 +117,8 @@ def Test_VariadicityOp : Test_Op<"variadicity"> { Test_SingletonCType:$required); } // CHECK-LABEL: irdl.operation @variadicity { -// CHECK-NEXT: %[[v0:[^ ]*]] = irdl.base "!test.singleton_a" -// CHECK-NEXT: %[[v1:[^ ]*]] = irdl.base "!test.singleton_b" -// CHECK-NEXT: %[[v2:[^ ]*]] = irdl.base "!test.singleton_c" +// CHECK-NEXT: %[[v0:[^ ]*]] = irdl.base @test::@"!singleton_a" +// CHECK-NEXT: %[[v1:[^ ]*]] = irdl.base @test::@"!singleton_b" +// CHECK-NEXT: %[[v2:[^ ]*]] = irdl.base @test::@"!singleton_c" // CHECK-NEXT: irdl.operands(variadic %[[v0]], optional %[[v1]], %[[v2]]) // CHECK-NEXT: } diff --git a/mlir/tools/tblgen-to-irdl/OpDefinitionsGen.cpp b/mlir/tools/tblgen-to-irdl/OpDefinitionsGen.cpp index dd0d98de496e..45957bafc378 100644 --- a/mlir/tools/tblgen-to-irdl/OpDefinitionsGen.cpp +++ b/mlir/tools/tblgen-to-irdl/OpDefinitionsGen.cpp @@ -177,6 +177,15 @@ Value createConstraint(OpBuilder &builder, tblgen::Constraint constraint) { } if (predRec.isSubClassOf("TypeDef")) { + auto dialect = predRec.getValueAsDef("dialect")->getValueAsString("name"); + if (dialect == selectedDialect) { + std::string combined = ("!" + predRec.getValueAsString("mnemonic")).str(); + SmallVector<FlatSymbolRefAttr> nested = { + SymbolRefAttr::get(ctx, combined)}; + auto typeSymbol = SymbolRefAttr::get(ctx, dialect, nested); + auto op = builder.create<irdl::BaseOp>(UnknownLoc::get(ctx), typeSymbol); + return op.getOutput(); + } std::string typeName = ("!" + predRec.getValueAsString("typeName")).str(); auto op = builder.create<irdl::BaseOp>(UnknownLoc::get(ctx), StringAttr::get(ctx, typeName)); @@ -250,6 +259,12 @@ static StringRef getOperatorName(tblgen::Operator &tblgenOp) { return opName; } +/// Returns the name of the type without the dialect prefix. +static StringRef getTypeName(tblgen::TypeDef &tblgenType) { + StringRef opName = tblgenType.getDef()->getValueAsString("mnemonic"); + return opName; +} + /// Extract an operation to IRDL. irdl::OperationOp createIRDLOperation(OpBuilder &builder, tblgen::Operator &tblgenOp) { @@ -300,6 +315,19 @@ irdl::OperationOp createIRDLOperation(OpBuilder &builder, return op; } +irdl::TypeOp createIRDLType(OpBuilder &builder, tblgen::TypeDef &tblgenType) { + MLIRContext *ctx = builder.getContext(); + StringRef typeName = getTypeName(tblgenType); + std::string combined = ("!" + typeName).str(); + + irdl::TypeOp op = builder.create<irdl::TypeOp>( + UnknownLoc::get(ctx), StringAttr::get(ctx, combined)); + + op.getBody().emplaceBlock(); + + return op; +} + static irdl::DialectOp createIRDLDialect(OpBuilder &builder) { MLIRContext *ctx = builder.getContext(); return builder.create<irdl::DialectOp>(UnknownLoc::get(ctx), @@ -322,6 +350,14 @@ static bool emitDialectIRDLDefs(const RecordKeeper &recordKeeper, // Set insertion point to start of DialectOp. builder = builder.atBlockBegin(&dialect.getBody().emplaceBlock()); + for (const Record *type : + recordKeeper.getAllDerivedDefinitionsIfDefined("TypeDef")) { + tblgen::TypeDef tblgenType(type); + if (tblgenType.getDialect().getName() != selectedDialect) + continue; + createIRDLType(builder, tblgenType); + } + for (const Record *def : recordKeeper.getAllDerivedDefinitionsIfDefined("Op")) { tblgen::Operator tblgenOp(def); diff --git a/offload/test/lit.cfg b/offload/test/lit.cfg index 9ddef42cf903..514bb89e0b64 100644 --- a/offload/test/lit.cfg +++ b/offload/test/lit.cfg @@ -182,7 +182,12 @@ def remove_suffix_if_present(name): return name def add_libraries(source): - return source + " " + config.llvm_library_intdir + "/libomptarget.devicertl.a" + if config.libomptarget_has_libc: + return source + " -Xoffload-linker " + "-lc " + \ + "-Xoffload-linker " + "-lm " + \ + config.llvm_library_intdir + "/libomptarget.devicertl.a" + else: + return source + " " + config.llvm_library_intdir + "/libomptarget.devicertl.a" # Add platform targets host_targets = [ diff --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt index f106694841ce..2dd54b5116d9 100644 --- a/openmp/runtime/src/CMakeLists.txt +++ b/openmp/runtime/src/CMakeLists.txt @@ -426,7 +426,11 @@ if(WIN32) endforeach() else() - install(TARGETS omp ${export_to_llvmexports} ${LIBOMP_INSTALL_KIND} DESTINATION "${OPENMP_INSTALL_LIBDIR}") + if(${CMAKE_SYSTEM_NAME} MATCHES "AIX") + install(FILES ${LIBOMP_LIBRARY_DIR}/libomp.a DESTINATION "${OPENMP_INSTALL_LIBDIR}" COMPONENT runtime) + else() + install(TARGETS omp ${export_to_llvmexports} ${LIBOMP_INSTALL_KIND} DESTINATION "${OPENMP_INSTALL_LIBDIR}") + endif() if(${LIBOMP_INSTALL_ALIASES}) # Create aliases (symlinks) of the library for backwards compatibility diff --git a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel index 1d0ba8bd4d58..b39fb8f6795e 100644 --- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel @@ -26,7 +26,6 @@ exports_files([ "tools/clang-format/clang-format.el", "tools/clang-format/clang-format-test.el", "tools/clang-format/clang-format.py", - "tools/clang-rename/clang-rename.el", "tools/extra/clang-include-fixer/tool/clang-include-fixer.el", "tools/extra/clang-include-fixer/tool/clang-include-fixer-test.el", ]) @@ -2591,20 +2590,6 @@ cc_binary( ) cc_binary( - name = "clang-rename", - srcs = glob(["tools/clang-rename/*.cpp"]), - stamp = 0, - deps = [ - ":basic", - ":frontend", - ":rewrite", - ":tooling", - ":tooling_refactoring", - "//llvm:Support", - ], -) - -cc_binary( name = "clang-repl", srcs = glob(["tools/clang-repl/*.cpp"]), stamp = 0, diff --git a/utils/bazel/llvm-project-overlay/clang/unittests/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/unittests/BUILD.bazel index 884a6055cf4e..e8c7106b2875 100644 --- a/utils/bazel/llvm-project-overlay/clang/unittests/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/clang/unittests/BUILD.bazel @@ -299,32 +299,6 @@ cc_library( ) cc_test( - name = "rename_tests", - size = "small", - timeout = "moderate", - srcs = glob( - [ - "Rename/*.cpp", - "Rename/*.h", - ], - allow_empty = False, - ), - shard_count = 20, - deps = [ - ":rename_tests_tooling_hdrs", - "//clang:ast_matchers", - "//clang:basic", - "//clang:format", - "//clang:frontend", - "//clang:tooling", - "//clang:tooling_refactoring", - "//llvm:Support", - "//third-party/unittest:gtest", - "//third-party/unittest:gtest_main", - ], -) - -cc_test( name = "rewrite_tests", size = "small", srcs = glob( diff --git a/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/config.h b/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/config.h index 15696c346bff..74b4eca0889a 100644 --- a/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/config.h +++ b/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/config.h @@ -198,9 +198,6 @@ /* Define to 1 if you have the <sys/mman.h> header file. */ #define HAVE_SYS_MMAN_H 1 -/* Define to 1 if you have the <sys/param.h> header file. */ -#define HAVE_SYS_PARAM_H 1 - /* Define to 1 if you have the <sys/resource.h> header file. */ #define HAVE_SYS_RESOURCE_H 1 @@ -216,9 +213,6 @@ /* Define to 1 if stat struct has st_mtim member. */ /* HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC defined in Bazel */ -/* Define to 1 if you have the <sys/types.h> header file. */ -#define HAVE_SYS_TYPES_H 1 - /* Define to 1 if you have the <termios.h> header file. */ #define HAVE_TERMIOS_H 1 diff --git a/utils/bazel/llvm_configs/config.h.cmake b/utils/bazel/llvm_configs/config.h.cmake index d71ff40144c0..4c9404d95daf 100644 --- a/utils/bazel/llvm_configs/config.h.cmake +++ b/utils/bazel/llvm_configs/config.h.cmake @@ -191,9 +191,6 @@ /* Define to 1 if you have the <sys/mman.h> header file. */ #cmakedefine HAVE_SYS_MMAN_H ${HAVE_SYS_MMAN_H} -/* Define to 1 if you have the <sys/param.h> header file. */ -#cmakedefine HAVE_SYS_PARAM_H ${HAVE_SYS_PARAM_H} - /* Define to 1 if you have the <sys/resource.h> header file. */ #cmakedefine HAVE_SYS_RESOURCE_H ${HAVE_SYS_RESOURCE_H} @@ -209,9 +206,6 @@ /* Define to 1 if stat struct has st_mtim member. */ #cmakedefine HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC ${HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC} -/* Define to 1 if you have the <sys/types.h> header file. */ -#cmakedefine HAVE_SYS_TYPES_H ${HAVE_SYS_TYPES_H} - /* Define to 1 if you have the <termios.h> header file. */ #cmakedefine HAVE_TERMIOS_H ${HAVE_TERMIOS_H} |
