diff options
341 files changed, 8136 insertions, 2173 deletions
diff --git a/.github/copilot-instructions.md b/.github/instructions/llvm.instructions.md index 03748938700e..3f1308f51e67 100644 --- a/.github/copilot-instructions.md +++ b/.github/instructions/llvm.instructions.md @@ -1,3 +1,7 @@ +--- +applyTo: llvm/**/* +--- + When performing a code review, pay close attention to code modifying a function's control flow. Could the change result in the corruption of performance profile data? Could the change result in invalid debug information, in particular for diff --git a/.github/workflows/release-binaries-all.yml b/.github/workflows/release-binaries-all.yml index 0b52a08202f1..eef49b5e3625 100644 --- a/.github/workflows/release-binaries-all.yml +++ b/.github/workflows/release-binaries-all.yml @@ -90,7 +90,6 @@ jobs: runs-on: - ubuntu-22.04 - ubuntu-22.04-arm - - macos-13 - macos-14 uses: ./.github/workflows/release-binaries.yml diff --git a/.github/workflows/release-binaries.yml b/.github/workflows/release-binaries.yml index 814592626525..fa73b9d9fe8d 100644 --- a/.github/workflows/release-binaries.yml +++ b/.github/workflows/release-binaries.yml @@ -21,7 +21,6 @@ on: options: - ubuntu-22.04 - ubuntu-22.04-arm - - macos-13 - macos-14 workflow_call: @@ -130,8 +129,6 @@ jobs: target_cmake_flags="$target_cmake_flags -DBOOTSTRAP_BOOTSTRAP_COMPILER_RT_ENABLE_IOS=OFF" if [ "$RUNNER_ARCH" = "ARM64" ]; then arches=arm64 - else - arches=x86_64 fi target_cmake_flags="$target_cmake_flags -DBOOTSTRAP_BOOTSTRAP_DARWIN_osx_ARCHS=$arches -DBOOTSTRAP_BOOTSTRAP_DARWIN_osx_BUILTIN_ARCHS=$arches" fi @@ -147,14 +144,6 @@ jobs: build_runs_on="depot-${{ inputs.runs-on }}-16" test_runs_on=$build_runs_on ;; - macos-13) - if [ "$GITHUB_EVENT_NAME" = "pull_request" ]; then - build_runs_on="${{ inputs.runs-on }}" - else - build_runs_on="macos-13-large" - fi - test_runs_on="${{ inputs.runs-on }}" - ;; macos-14) if [ "$GITHUB_EVENT_NAME" = "pull_request" ]; then build_runs_on="${{ inputs.runs-on }}" diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index 84023efe1084..fbe186454351 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -1699,9 +1699,19 @@ bool BinaryFunction::scanExternalRefs() { const uint64_t FunctionOffset = TargetAddress - TargetFunction->getAddress(); - BranchTargetSymbol = - FunctionOffset ? TargetFunction->addEntryPointAtOffset(FunctionOffset) - : TargetFunction->getSymbol(); + if (!TargetFunction->isInConstantIsland(TargetAddress)) { + BranchTargetSymbol = + FunctionOffset + ? TargetFunction->addEntryPointAtOffset(FunctionOffset) + : TargetFunction->getSymbol(); + } else { + TargetFunction->setIgnored(); + BC.outs() << "BOLT-WARNING: Ignoring entry point at address 0x" + << Twine::utohexstr(Address) + << " in constant island of function " << *TargetFunction + << '\n'; + continue; + } } // Can't find more references. Not creating relocations since we are not diff --git a/bolt/test/AArch64/constant-island-entry.s b/bolt/test/AArch64/constant-island-entry.s index 6567114eb980..7f8449deea13 100644 --- a/bolt/test/AArch64/constant-island-entry.s +++ b/bolt/test/AArch64/constant-island-entry.s @@ -1,10 +1,15 @@ -// This test checks that we ignore functions which add an entry point that -// is in a constant island. +## This test checks that we ignore functions which add an entry point that +## is in a constant island. # RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o # RUN: %clang %cflags %t.o -pie -Wl,-q -o %t.exe + +## Check when the caller is successfully disassembled. # RUN: llvm-bolt %t.exe -o %t.bolt 2>&1 | FileCheck %s +## Skip caller to check the identical warning is triggered from ScanExternalRefs(). +# RUN: llvm-bolt %t.exe -o %t.bolt -skip-funcs=caller 2>&1 | FileCheck %s + # CHECK: BOLT-WARNING: Ignoring entry point at address 0x{{[0-9a-f]+}} in constant island of function func .globl func diff --git a/clang-tools-extra/clang-tidy/readability/RedundantParenthesesCheck.cpp b/clang-tools-extra/clang-tidy/readability/RedundantParenthesesCheck.cpp index 0ab59fff39d8..874b9618bd88 100644 --- a/clang-tools-extra/clang-tidy/readability/RedundantParenthesesCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/RedundantParenthesesCheck.cpp @@ -7,6 +7,8 @@ //===----------------------------------------------------------------------===// #include "RedundantParenthesesCheck.h" +#include "../utils/Matchers.h" +#include "../utils/OptionsUtils.h" #include "clang/AST/Expr.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" @@ -32,15 +34,30 @@ AST_MATCHER(ParenExpr, isInMacro) { } // namespace +RedundantParenthesesCheck::RedundantParenthesesCheck(StringRef Name, + ClangTidyContext *Context) + : ClangTidyCheck(Name, Context), + AllowedDecls(utils::options::parseStringList( + Options.get("AllowedDecls", "std::max;std::min"))) {} + +void RedundantParenthesesCheck::storeOptions( + ClangTidyOptions::OptionMap &Opts) { + Options.store(Opts, "AllowedDecls", + utils::options::serializeStringList(AllowedDecls)); +} + void RedundantParenthesesCheck::registerMatchers(MatchFinder *Finder) { const auto ConstantExpr = expr(anyOf(integerLiteral(), floatLiteral(), characterLiteral(), cxxBoolLiteral(), stringLiteral(), cxxNullPtrLiteralExpr())); Finder->addMatcher( - parenExpr(subExpr(anyOf(parenExpr(), ConstantExpr, declRefExpr())), - unless(anyOf(isInMacro(), - // sizeof(...) is common used. - hasParent(unaryExprOrTypeTraitExpr())))) + parenExpr( + subExpr(anyOf(parenExpr(), ConstantExpr, + declRefExpr(to(namedDecl(unless( + matchers::matchesAnyListedName(AllowedDecls))))))), + unless(anyOf(isInMacro(), + // sizeof(...) is common used. + hasParent(unaryExprOrTypeTraitExpr())))) .bind("dup"), this); } diff --git a/clang-tools-extra/clang-tidy/readability/RedundantParenthesesCheck.h b/clang-tools-extra/clang-tidy/readability/RedundantParenthesesCheck.h index 9a0409b83fff..2638a09730f7 100644 --- a/clang-tools-extra/clang-tidy/readability/RedundantParenthesesCheck.h +++ b/clang-tools-extra/clang-tidy/readability/RedundantParenthesesCheck.h @@ -20,13 +20,16 @@ namespace clang::tidy::readability { /// https://clang.llvm.org/extra/clang-tidy/checks/readability/redundant-parentheses.html class RedundantParenthesesCheck : public ClangTidyCheck { public: - RedundantParenthesesCheck(StringRef Name, ClangTidyContext *Context) - : ClangTidyCheck(Name, Context) {} + RedundantParenthesesCheck(StringRef Name, ClangTidyContext *Context); + void storeOptions(ClangTidyOptions::OptionMap &Opts) override; void registerMatchers(ast_matchers::MatchFinder *Finder) override; void check(const ast_matchers::MatchFinder::MatchResult &Result) override; bool isLanguageVersionSupported(const LangOptions &LangOpts) const override { return LangOpts.CPlusPlus | LangOpts.C99; } + +private: + const std::vector<StringRef> AllowedDecls; }; } // namespace clang::tidy::readability diff --git a/clang-tools-extra/docs/clang-tidy/checks/readability/redundant-parentheses.rst b/clang-tools-extra/docs/clang-tidy/checks/readability/redundant-parentheses.rst index 23d975e64649..20e3891c72d7 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/readability/redundant-parentheses.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/readability/redundant-parentheses.rst @@ -27,3 +27,16 @@ affect the semantics. .. code-block:: c++ int a = (1 * 2) + 3; // no warning + +Options +------- + +.. option:: AllowedDecls + + Semicolon-separated list of regular expressions matching names of declarations + to ignore when the parentheses are around. Declarations can include variables + or functions. The default is an `std::max;std::min`. + + Some STL library functions may have the same name as widely used function-like + macro. For example, ``std::max`` and ``max`` macro. A workaround to distinguish + them is adding parentheses around functions to prevent function-like macro. diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/redundant-parentheses.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/redundant-parentheses.cpp index 926cb118c77c..c77608c66469 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/readability/redundant-parentheses.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/readability/redundant-parentheses.cpp @@ -62,3 +62,12 @@ void exceptions() { // CHECK-MESSAGES: :[[@LINE-1]]:11: warning: redundant parentheses around expression [readability-redundant-parentheses] // CHECK-FIXES: alignof(3); } + +namespace std { + template<class T> T max(T, T); + template<class T> T min(T, T); +} // namespace std +void ignoreStdMaxMin() { + (std::max)(1,2); + (std::min)(1,2); +} diff --git a/clang/AreaTeamMembers.txt b/clang/AreaTeamMembers.txt index 964d11e79f69..2928943f4753 100644 --- a/clang/AreaTeamMembers.txt +++ b/clang/AreaTeamMembers.txt @@ -13,5 +13,5 @@ rnk@google.com (email), rnk (Discourse), rnk (GitHub), rnk (Discord) Other Members ------------- Eli Friedman -efriedma@quicinc.com> (email), efriedma-quic (Discourse), efriedma-quic (GitHub) +efriedma@qti.qualcomm.com> (email), efriedma-quic (Discourse), efriedma-quic (GitHub) diff --git a/clang/Maintainers.rst b/clang/Maintainers.rst index 8fb2201aae16..1d16ea9fe563 100644 --- a/clang/Maintainers.rst +++ b/clang/Maintainers.rst @@ -46,7 +46,7 @@ Clang LLVM IR generation | rjmccall\@apple.com (email), rjmccall (Phabricator), rjmccall (GitHub) | Eli Friedman -| efriedma\@quicinc.com (email), efriedma (Phabricator), efriedma-quic (GitHub) +| efriedma\@qti.qualcomm.com (email), efriedma (Phabricator), efriedma-quic (GitHub) | Anton Korobeynikov | anton\@korobeynikov.info (email), asl (Phabricator), asl (GitHub) diff --git a/clang/bindings/python/clang/cindex.py b/clang/bindings/python/clang/cindex.py index 2786add27f5e..c48bc9c2eb7d 100644 --- a/clang/bindings/python/clang/cindex.py +++ b/clang/bindings/python/clang/cindex.py @@ -2363,6 +2363,13 @@ class Cursor(Structure): return conf.lib.clang_getFieldDeclBitWidth(self) # type: ignore [no-any-return] @cursor_null_guard + def is_function_inlined(self) -> bool: + """ + Check if the function is inlined. + """ + return bool(conf.lib.clang_Cursor_isFunctionInlined(self)) + + @cursor_null_guard def has_attrs(self) -> bool: """ Determine whether the given cursor has any attributes. @@ -4310,6 +4317,7 @@ FUNCTION_LIST: list[LibFunc] = [ ("clang_Cursor_isAnonymous", [Cursor], bool), ("clang_Cursor_isAnonymousRecordDecl", [Cursor], bool), ("clang_Cursor_isBitField", [Cursor], bool), + ("clang_Cursor_isFunctionInlined", [Cursor], c_uint), ("clang_Location_isInSystemHeader", [SourceLocation], bool), ("clang_PrintingPolicy_dispose", [PrintingPolicy]), ("clang_PrintingPolicy_getProperty", [PrintingPolicy, c_int], c_uint), diff --git a/clang/bindings/python/tests/cindex/test_cursor.py b/clang/bindings/python/tests/cindex/test_cursor.py index eb0d1d50601a..7cb616a7ef14 100644 --- a/clang/bindings/python/tests/cindex/test_cursor.py +++ b/clang/bindings/python/tests/cindex/test_cursor.py @@ -784,6 +784,21 @@ int count(int a, int b){ cursor = get_cursor(tu, "reg") self.assertEqual(cursor.storage_class, StorageClass.REGISTER) + def test_function_inlined(self): + tu = get_tu( + """ +inline void f_inline(void); +void f_noninline(void); +int d_noninline; +""" + ) + cursor = get_cursor(tu, "f_inline") + self.assertEqual(cursor.is_function_inlined(), True) + cursor = get_cursor(tu, "f_noninline") + self.assertEqual(cursor.is_function_inlined(), False) + cursor = get_cursor(tu, "d_noninline") + self.assertEqual(cursor.is_function_inlined(), False) + def test_availability(self): tu = get_tu("class A { A(A const&) = delete; };", lang="cpp") diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst index 570cab262c11..0b4a4849f6cc 100644 --- a/clang/docs/ClangFormatStyleOptions.rst +++ b/clang/docs/ClangFormatStyleOptions.rst @@ -197,57 +197,29 @@ the configuration (without a prefix: ``Auto``). .. _AlignAfterOpenBracket: -**AlignAfterOpenBracket** (``BracketAlignmentStyle``) :versionbadge:`clang-format 3.8` :ref:`¶ <AlignAfterOpenBracket>` +**AlignAfterOpenBracket** (``Boolean``) :versionbadge:`clang-format 3.8` :ref:`¶ <AlignAfterOpenBracket>` If ``true``, horizontally aligns arguments after an open bracket. - This applies to round brackets (parentheses), angle brackets and square - brackets. - - Possible values: - - * ``BAS_Align`` (in configuration: ``Align``) - Align parameters on the open bracket, e.g.: - - .. code-block:: c++ - - someLongFunction(argument1, - argument2); - - * ``BAS_DontAlign`` (in configuration: ``DontAlign``) - Don't align, instead use ``ContinuationIndentWidth``, e.g.: - - .. code-block:: c++ - - someLongFunction(argument1, - argument2); - - * ``BAS_AlwaysBreak`` (in configuration: ``AlwaysBreak``) - Always break after an open bracket, if the parameters don't fit - on a single line, e.g.: - - .. code-block:: c++ - someLongFunction( - argument1, argument2); - - * ``BAS_BlockIndent`` (in configuration: ``BlockIndent``) - Always break after an open bracket, if the parameters don't fit - on a single line. Closing brackets will be placed on a new line. - E.g.: - - .. code-block:: c++ + .. code-block:: c++ - someLongFunction( - argument1, argument2 - ) + true: vs. false + someLongFunction(argument1, someLongFunction(argument1, + argument2); argument2); - .. note:: - - This currently only applies to braced initializer lists (when - ``Cpp11BracedListStyle`` is not ``Block``) and parentheses. + .. note:: + As of clang-format 22 this option is a bool with the previous + option of ``Align`` replaced with ``true``, ``DontAlign`` replaced + with ``false``, and the options of ``AlwaysBreak`` and ``BlockIndent`` + replaced with ``true`` and with setting of new style options using + ``BreakAfterOpenBracketBracedList``, ``BreakAfterOpenBracketFunction``, + ``BreakAfterOpenBracketIf``, ``BreakBeforeCloseBracketBracedList``, + ``BreakBeforeCloseBracketFunction``, and ``BreakBeforeCloseBracketIf``. + This applies to round brackets (parentheses), angle brackets and square + brackets. .. _AlignArrayOfStructures: @@ -2746,6 +2718,67 @@ the configuration (without a prefix: ``Auto``). @Mock DataLoad loader; +.. _BreakAfterOpenBracketBracedList: + +**BreakAfterOpenBracketBracedList** (``Boolean``) :versionbadge:`clang-format 22` :ref:`¶ <BreakAfterOpenBracketBracedList>` + Force break after the left bracket of a braced initializer list (when + ``Cpp11BracedListStyle`` is ``true``) when the list exceeds the column + limit. + + .. code-block:: c++ + + true: false: + vector<int> x { vs. vector<int> x {1, + 1, 2, 3} 2, 3} + +.. _BreakAfterOpenBracketFunction: + +**BreakAfterOpenBracketFunction** (``Boolean``) :versionbadge:`clang-format 22` :ref:`¶ <BreakAfterOpenBracketFunction>` + Force break after the left parenthesis of a function (declaration, + definition, call) when the parameters exceed the column limit. + + .. code-block:: c++ + + true: false: + foo ( vs. foo (a, + a , b) b) + +.. _BreakAfterOpenBracketIf: + +**BreakAfterOpenBracketIf** (``Boolean``) :versionbadge:`clang-format 22` :ref:`¶ <BreakAfterOpenBracketIf>` + Force break after the left parenthesis of an if control statement + when the expression exceeds the column limit. + + .. code-block:: c++ + + true: false: + if constexpr ( vs. if constexpr (a || + a || b) b) + +.. _BreakAfterOpenBracketLoop: + +**BreakAfterOpenBracketLoop** (``Boolean``) :versionbadge:`clang-format 22` :ref:`¶ <BreakAfterOpenBracketLoop>` + Force break after the left parenthesis of a loop control statement + when the expression exceeds the column limit. + + .. code-block:: c++ + + true: false: + while ( vs. while (a && + a && b) { b) { + +.. _BreakAfterOpenBracketSwitch: + +**BreakAfterOpenBracketSwitch** (``Boolean``) :versionbadge:`clang-format 22` :ref:`¶ <BreakAfterOpenBracketSwitch>` + Force break after the left parenthesis of a switch control statement + when the expression exceeds the column limit. + + .. code-block:: c++ + + true: false: + switch ( vs. switch (a + + a + b) { b) { + .. _BreakAfterReturnType: **BreakAfterReturnType** (``ReturnTypeBreakingStyle``) :versionbadge:`clang-format 19` :ref:`¶ <BreakAfterReturnType>` @@ -3383,6 +3416,79 @@ the configuration (without a prefix: ``Auto``). +.. _BreakBeforeCloseBracketBracedList: + +**BreakBeforeCloseBracketBracedList** (``Boolean``) :versionbadge:`clang-format 22` :ref:`¶ <BreakBeforeCloseBracketBracedList>` + Force break before the right bracket of a braced initializer list (when + ``Cpp11BracedListStyle`` is ``true``) when the list exceeds the column + limit. The break before the right bracket is only made if there is a + break after the opening bracket. + + .. code-block:: c++ + + true: false: + vector<int> x { vs. vector<int> x { + 1, 2, 3 1, 2, 3} + } + +.. _BreakBeforeCloseBracketFunction: + +**BreakBeforeCloseBracketFunction** (``Boolean``) :versionbadge:`clang-format 22` :ref:`¶ <BreakBeforeCloseBracketFunction>` + Force break before the right parenthesis of a function (declaration, + definition, call) when the parameters exceed the column limit. + + .. code-block:: c++ + + true: false: + foo ( vs. foo ( + a , b a , b) + ) + +.. _BreakBeforeCloseBracketIf: + +**BreakBeforeCloseBracketIf** (``Boolean``) :versionbadge:`clang-format 22` :ref:`¶ <BreakBeforeCloseBracketIf>` + Force break before the right parenthesis of an if control statement + when the expression exceeds the column limit. The break before the + closing parenthesis is only made if there is a break after the opening + parenthesis. + + .. code-block:: c++ + + true: false: + if constexpr ( vs. if constexpr ( + a || b a || b ) + ) + +.. _BreakBeforeCloseBracketLoop: + +**BreakBeforeCloseBracketLoop** (``Boolean``) :versionbadge:`clang-format 22` :ref:`¶ <BreakBeforeCloseBracketLoop>` + Force break before the right parenthesis of a loop control statement + when the expression exceeds the column limit. The break before the + closing parenthesis is only made if there is a break after the opening + parenthesis. + + .. code-block:: c++ + + true: false: + while ( vs. while ( + a && b a && b) { + ) { + +.. _BreakBeforeCloseBracketSwitch: + +**BreakBeforeCloseBracketSwitch** (``Boolean``) :versionbadge:`clang-format 22` :ref:`¶ <BreakBeforeCloseBracketSwitch>` + Force break before the right parenthesis of a switch control statement + when the expression exceeds the column limit. The break before the + closing parenthesis is only made if there is a break after the opening + parenthesis. + + .. code-block:: c++ + + true: false: + switch ( vs. switch ( + a + b a + b) { + ) { + .. _BreakBeforeConceptDeclarations: **BreakBeforeConceptDeclarations** (``BreakBeforeConceptDeclarationsStyle``) :versionbadge:`clang-format 12` :ref:`¶ <BreakBeforeConceptDeclarations>` diff --git a/clang/docs/Modules.rst b/clang/docs/Modules.rst index acbe45e0be97..e45ee9ff9eac 100644 --- a/clang/docs/Modules.rst +++ b/clang/docs/Modules.rst @@ -421,13 +421,7 @@ As an example, the module map file for the C standard library might look a bit l .. parsed-literal:: - module std [system] [extern_c] { - module assert { - textual header "assert.h" - header "bits/assert-decls.h" - export * - } - + module std [system] { module complex { header "complex.h" export * @@ -440,7 +434,6 @@ As an example, the module map file for the C standard library might look a bit l module errno { header "errno.h" - header "sys/errno.h" export * } @@ -673,14 +666,14 @@ of checking *use-declaration*\s, and must still be a lexically-valid header file. In the future, we intend to pre-tokenize such headers and include the token sequence within the prebuilt module representation. -A header with the ``exclude`` specifier is excluded from the module. It will not be included when the module is built, nor will it be considered to be part of the module, even if an ``umbrella`` header or directory would otherwise make it part of the module. +A header with the ``exclude`` specifier is excluded from the module. It will not be included when the module is built, nor will it be considered to be part of the module, even if an ``umbrella`` directory would otherwise make it part of the module. -**Example:** The C header ``assert.h`` is an excellent candidate for a textual header, because it is meant to be included multiple times (possibly with different ``NDEBUG`` settings). However, declarations within it should typically be split into a separate modular header. +**Example:** A "X macro" header is an excellent candidate for a textual header, because it is can't be compiled standalone, and by itself does not contain any declarations. .. parsed-literal:: - module std [system] { - textual header "assert.h" + module MyLib [system] { + textual header "xmacros.h" } A given header shall not be referenced by more than one *header-declaration*. diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 8435f367029a..73aaaad8b32e 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -613,6 +613,14 @@ clang-format literals. - Add ``Leave`` suboption to ``IndentPPDirectives``. - Add ``AllowBreakBeforeQtProperty`` option. +- Add ``BreakAfterOpenBracketBracedList'', ``BreakAfterOpenBracketFunction'', + ``BreakAfterOpenBracketIf``, ``BreakAfterOpenBracketLoop``, + ``BreakAfterOpenBracketSwitch``, ``BreakBeforeCloseBracketBracedList'', + ``BreakBeforeCloseBracketFunction``, ``BreakBeforeCloseBracketIf``, + ``BreakBeforeCloseBracketLoop``, ``BreakBeforeCloseBracketSwitch`` options. +- Deprecate ``AlwaysBreak`` and ``BlockIndent`` suboptions from the + ``AlignAfterOpenBracket`` option, and make ``AlignAfterOpenBracket`` a + ``bool`` type. libclang -------- @@ -651,6 +659,7 @@ Sanitizers Python Binding Changes ---------------------- +- Exposed ``clang_Cursor_isFunctionInlined``. - Exposed ``clang_getCursorLanguage`` via ``Cursor.language``. - Add all missing ``CursorKind``s, ``TypeKind``s and ``ExceptionSpecificationKind``s from ``Index.h`` diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def index b856ad145824..3a5b72e20afa 100644 --- a/clang/include/clang/Basic/Builtins.def +++ b/clang/include/clang/Basic/Builtins.def @@ -43,6 +43,7 @@ // SJ -> sigjmp_buf // K -> ucontext_t // p -> pid_t +// e -> _Float16 for HIP/C++ and __fp16 for OpenCL // . -> "...". This may only occur at the end of the function list. // // Types may be prefixed with the following modifiers: diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index f265d82efee7..36cb527a9c80 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -967,6 +967,47 @@ TARGET_BUILTIN(__builtin_amdgcn_image_sample_3d_v4f32_f32, "V4fifffQtV4ibii", "n TARGET_BUILTIN(__builtin_amdgcn_image_sample_3d_v4f16_f32, "V4hifffQtV4ibii", "nc", "image-insts") TARGET_BUILTIN(__builtin_amdgcn_image_sample_cube_v4f32_f32, "V4fifffQtV4ibii", "nc", "image-insts") TARGET_BUILTIN(__builtin_amdgcn_image_sample_cube_v4f16_f32, "V4hifffQtV4ibii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_1d_v4f32_f32, "V4fifQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_1d_v4f16_f32, "V4eifQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_1darray_v4f32_f32, "V4fiffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_1darray_v4f16_f32, "V4eiffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_2d_f32_f32, "fiffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_2d_v4f32_f32, "V4fiffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_2d_v4f16_f32, "V4eiffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_2darray_f32_f32, "fifffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_2darray_v4f32_f32, "V4fifffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_2darray_v4f16_f32, "V4eifffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_3d_v4f32_f32, "V4fifffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_3d_v4f16_f32, "V4eifffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_cube_v4f32_f32, "V4fifffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_cube_v4f16_f32, "V4eifffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_1d_v4f32_f32, "V4fiffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_1d_v4f16_f32, "V4eiffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_1darray_v4f32_f32, "V4fifffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_1darray_v4f16_f32, "V4eifffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_2d_f32_f32, "fifffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_2d_v4f32_f32, "V4fifffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_2d_v4f16_f32, "V4eifffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_2darray_f32_f32, "fiffffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_2darray_v4f32_f32, "V4fiffffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_2darray_v4f16_f32, "V4eiffffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_3d_v4f32_f32, "V4fiffffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_3d_v4f16_f32, "V4eiffffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_cube_v4f32_f32, "V4fiffffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_cube_v4f16_f32, "V4eiffffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_1d_v4f32_f32, "V4fifffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_1d_v4f16_f32, "V4eifffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_1darray_v4f32_f32, "V4fiffffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_1darray_v4f16_f32, "V4eiffffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_2d_f32_f32, "fiffffffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_2d_v4f32_f32, "V4fiffffffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_2d_v4f16_f32, "V4eiffffffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_2darray_f32_f32, "fifffffffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_2darray_v4f32_f32, "V4fifffffffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_2darray_v4f16_f32, "V4eifffffffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_3d_v4f32_f32, "V4fifffffffffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_3d_v4f16_f32, "V4eifffffffffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32, "V4fiffQtV4ibii", "nc", "extended-image-insts") #undef BUILTIN #undef TARGET_BUILTIN diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 500aa85fe535..9e877b92eac6 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -328,7 +328,6 @@ let Features = "ssse3", Attributes = [NoThrow, Const, Constexpr, RequiredVectorW } let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def insertps128 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">; def roundps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Constant int)">; def roundss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int)">; def roundsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">; @@ -342,6 +341,8 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { + def insertps128 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, " + "_Vector<4, float>, _Constant char)">; def ptestz128 : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">; def ptestc128 diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h index 2852c4a2916a..f246defc1fe8 100644 --- a/clang/include/clang/Format/Format.h +++ b/clang/include/clang/Format/Format.h @@ -62,49 +62,28 @@ struct FormatStyle { /// \version 3.3 int AccessModifierOffset; - /// Different styles for aligning after open brackets. - enum BracketAlignmentStyle : int8_t { - /// Align parameters on the open bracket, e.g.: - /// \code - /// someLongFunction(argument1, - /// argument2); - /// \endcode - BAS_Align, - /// Don't align, instead use ``ContinuationIndentWidth``, e.g.: - /// \code - /// someLongFunction(argument1, - /// argument2); - /// \endcode - BAS_DontAlign, - /// Always break after an open bracket, if the parameters don't fit - /// on a single line, e.g.: - /// \code - /// someLongFunction( - /// argument1, argument2); - /// \endcode - BAS_AlwaysBreak, - /// Always break after an open bracket, if the parameters don't fit - /// on a single line. Closing brackets will be placed on a new line. - /// E.g.: - /// \code - /// someLongFunction( - /// argument1, argument2 - /// ) - /// \endcode - /// - /// \note - /// This currently only applies to braced initializer lists (when - /// ``Cpp11BracedListStyle`` is not ``Block``) and parentheses. - /// \endnote - BAS_BlockIndent, - }; - /// If ``true``, horizontally aligns arguments after an open bracket. /// + /// \code + /// true: vs. false + /// someLongFunction(argument1, someLongFunction(argument1, + /// argument2); argument2); + /// \endcode + /// + /// \note + /// As of clang-format 22 this option is a bool with the previous + /// option of ``Align`` replaced with ``true``, ``DontAlign`` replaced + /// with ``false``, and the options of ``AlwaysBreak`` and ``BlockIndent`` + /// replaced with ``true`` and with setting of new style options using + /// ``BreakAfterOpenBracketBracedList``, ``BreakAfterOpenBracketFunction``, + /// ``BreakAfterOpenBracketIf``, ``BreakBeforeCloseBracketBracedList``, + /// ``BreakBeforeCloseBracketFunction``, and ``BreakBeforeCloseBracketIf``. + /// \endnote + /// /// This applies to round brackets (parentheses), angle brackets and square /// brackets. /// \version 3.8 - BracketAlignmentStyle AlignAfterOpenBracket; + bool AlignAfterOpenBracket; /// Different style for aligning array initializers. enum ArrayInitializerAlignmentStyle : int8_t { @@ -1708,6 +1687,57 @@ struct FormatStyle { /// \version 16 AttributeBreakingStyle BreakAfterAttributes; + /// Force break after the left bracket of a braced initializer list (when + /// ``Cpp11BracedListStyle`` is ``true``) when the list exceeds the column + /// limit. + /// \code + /// true: false: + /// vector<int> x { vs. vector<int> x {1, + /// 1, 2, 3} 2, 3} + /// \endcode + /// \version 22 + bool BreakAfterOpenBracketBracedList; + + /// Force break after the left parenthesis of a function (declaration, + /// definition, call) when the parameters exceed the column limit. + /// \code + /// true: false: + /// foo ( vs. foo (a, + /// a , b) b) + /// \endcode + /// \version 22 + bool BreakAfterOpenBracketFunction; + + /// Force break after the left parenthesis of an if control statement + /// when the expression exceeds the column limit. + /// \code + /// true: false: + /// if constexpr ( vs. if constexpr (a || + /// a || b) b) + /// \endcode + /// \version 22 + bool BreakAfterOpenBracketIf; + + /// Force break after the left parenthesis of a loop control statement + /// when the expression exceeds the column limit. + /// \code + /// true: false: + /// while ( vs. while (a && + /// a && b) { b) { + /// \endcode + /// \version 22 + bool BreakAfterOpenBracketLoop; + + /// Force break after the left parenthesis of a switch control statement + /// when the expression exceeds the column limit. + /// \code + /// true: false: + /// switch ( vs. switch (a + + /// a + b) { b) { + /// \endcode + /// \version 22 + bool BreakAfterOpenBracketSwitch; + /// The function declaration return type breaking style to use. /// \version 19 ReturnTypeBreakingStyle BreakAfterReturnType; @@ -2221,6 +2251,69 @@ struct FormatStyle { /// \version 3.7 BraceBreakingStyle BreakBeforeBraces; + /// Force break before the right bracket of a braced initializer list (when + /// ``Cpp11BracedListStyle`` is ``true``) when the list exceeds the column + /// limit. The break before the right bracket is only made if there is a + /// break after the opening bracket. + /// \code + /// true: false: + /// vector<int> x { vs. vector<int> x { + /// 1, 2, 3 1, 2, 3} + /// } + /// \endcode + /// \version 22 + bool BreakBeforeCloseBracketBracedList; + + /// Force break before the right parenthesis of a function (declaration, + /// definition, call) when the parameters exceed the column limit. + /// \code + /// true: false: + /// foo ( vs. foo ( + /// a , b a , b) + /// ) + /// \endcode + /// \version 22 + bool BreakBeforeCloseBracketFunction; + + /// Force break before the right parenthesis of an if control statement + /// when the expression exceeds the column limit. The break before the + /// closing parenthesis is only made if there is a break after the opening + /// parenthesis. + /// \code + /// true: false: + /// if constexpr ( vs. if constexpr ( + /// a || b a || b ) + /// ) + /// \endcode + /// \version 22 + bool BreakBeforeCloseBracketIf; + + /// Force break before the right parenthesis of a loop control statement + /// when the expression exceeds the column limit. The break before the + /// closing parenthesis is only made if there is a break after the opening + /// parenthesis. + /// \code + /// true: false: + /// while ( vs. while ( + /// a && b a && b) { + /// ) { + /// \endcode + /// \version 22 + bool BreakBeforeCloseBracketLoop; + + /// Force break before the right parenthesis of a switch control statement + /// when the expression exceeds the column limit. The break before the + /// closing parenthesis is only made if there is a break after the opening + /// parenthesis. + /// \code + /// true: false: + /// switch ( vs. switch ( + /// a + b a + b) { + /// ) { + /// \endcode + /// \version 22 + bool BreakBeforeCloseBracketSwitch; + /// Different ways to break before concept declarations. enum BreakBeforeConceptDeclarationsStyle : int8_t { /// Keep the template declaration line together with ``concept``. @@ -5530,10 +5623,23 @@ struct FormatStyle { BreakAdjacentStringLiterals == R.BreakAdjacentStringLiterals && BreakAfterAttributes == R.BreakAfterAttributes && BreakAfterJavaFieldAnnotations == R.BreakAfterJavaFieldAnnotations && + BreakAfterOpenBracketBracedList == + R.BreakAfterOpenBracketBracedList && + BreakAfterOpenBracketFunction == R.BreakAfterOpenBracketFunction && + BreakAfterOpenBracketIf == R.BreakAfterOpenBracketIf && + BreakAfterOpenBracketLoop == R.BreakAfterOpenBracketLoop && + BreakAfterOpenBracketSwitch == R.BreakAfterOpenBracketSwitch && BreakAfterReturnType == R.BreakAfterReturnType && BreakArrays == R.BreakArrays && BreakBeforeBinaryOperators == R.BreakBeforeBinaryOperators && BreakBeforeBraces == R.BreakBeforeBraces && + BreakBeforeCloseBracketBracedList == + R.BreakBeforeCloseBracketBracedList && + BreakBeforeCloseBracketFunction == + R.BreakBeforeCloseBracketFunction && + BreakBeforeCloseBracketIf == R.BreakBeforeCloseBracketIf && + BreakBeforeCloseBracketLoop == R.BreakBeforeCloseBracketLoop && + BreakBeforeCloseBracketSwitch == R.BreakBeforeCloseBracketSwitch && BreakBeforeConceptDeclarations == R.BreakBeforeConceptDeclarations && BreakBeforeInlineASMColon == R.BreakBeforeInlineASMColon && BreakBeforeTemplateCloser == R.BreakBeforeTemplateCloser && diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 687cd46773f4..2669f6245671 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -12403,6 +12403,11 @@ static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context, // Read the base type. switch (*Str++) { default: llvm_unreachable("Unknown builtin type letter!"); + case 'e': + assert(HowLong == 0 && !Signed && !Unsigned && + "Bad modifiers used with 'e'!"); + Type = Context.getLangOpts().OpenCL ? Context.HalfTy : Context.Float16Ty; + break; case 'x': assert(HowLong == 0 && !Signed && !Unsigned && "Bad modifiers used with 'x'!"); diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index b3ab82da5e01..8b57b963c538 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3411,7 +3411,7 @@ static bool interp__builtin_x86_byteshift( static bool interp__builtin_ia32_shuffle_generic( InterpState &S, CodePtr OpPC, const CallExpr *Call, - llvm::function_ref<std::pair<unsigned, unsigned>(unsigned, unsigned)> + llvm::function_ref<std::pair<unsigned, int>(unsigned, unsigned)> GetSourceIndex) { assert(Call->getNumArgs() == 3); @@ -3428,8 +3428,19 @@ static bool interp__builtin_ia32_shuffle_generic( for (unsigned DstIdx = 0; DstIdx != NumElems; ++DstIdx) { auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask); - const Pointer &Src = (SrcVecIdx == 0) ? A : B; - TYPE_SWITCH(ElemT, { Dst.elem<T>(DstIdx) = Src.elem<T>(SrcIdx); }); + + if (SrcIdx < 0) { + // Zero out this element + if (ElemT == PT_Float) { + Dst.elem<Floating>(DstIdx) = Floating( + S.getASTContext().getFloatTypeSemantics(VecT->getElementType())); + } else { + INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem<T>(DstIdx) = T::from(0); }); + } + } else { + const Pointer &Src = (SrcVecIdx == 0) ? A : B; + TYPE_SWITCH(ElemT, { Dst.elem<T>(DstIdx) = Src.elem<T>(SrcIdx); }); + } } Dst.initializeAllElements(); @@ -4382,7 +4393,8 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, unsigned SrcIdx = ElemInLane >= NumSelectableElems ? 1 : 0; unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits; unsigned Index = (ShuffleMask >> BitIndex) & IndexMask; - return std::pair<unsigned, unsigned>{SrcIdx, LaneOffset + Index}; + return std::pair<unsigned, int>{SrcIdx, + static_cast<int>(LaneOffset + Index)}; }); case X86::BI__builtin_ia32_shufpd: case X86::BI__builtin_ia32_shufpd256: @@ -4400,7 +4412,27 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, unsigned SrcIdx = ElemInLane >= NumSelectableElems ? 1 : 0; unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits; unsigned Index = (ShuffleMask >> BitIndex) & IndexMask; - return std::pair<unsigned, unsigned>{SrcIdx, LaneOffset + Index}; + return std::pair<unsigned, int>{SrcIdx, + static_cast<int>(LaneOffset + Index)}; + }); + case X86::BI__builtin_ia32_insertps128: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned Mask) { + // Bits [3:0]: zero mask - if bit is set, zero this element + if ((Mask & (1 << DstIdx)) != 0) { + return std::pair<unsigned, int>{0, -1}; + } + // Bits [7:6]: select element from source vector Y (0-3) + // Bits [5:4]: select destination position (0-3) + unsigned SrcElem = (Mask >> 6) & 0x3; + unsigned DstElem = (Mask >> 4) & 0x3; + if (DstIdx == DstElem) { + // Insert element from source vector (B) at this position + return std::pair<unsigned, int>{1, static_cast<int>(SrcElem)}; + } else { + // Copy from destination vector (A) + return std::pair<unsigned, int>{0, static_cast<int>(DstIdx)}; + } }); case X86::BI__builtin_ia32_pshufb128: case X86::BI__builtin_ia32_pshufb256: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index d0404b957ab0..97eeba8b9d6c 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11621,7 +11621,7 @@ static bool evalPackBuiltin(const CallExpr *E, EvalInfo &Info, APValue &Result, static bool evalShuffleGeneric( EvalInfo &Info, const CallExpr *Call, APValue &Out, - llvm::function_ref<std::pair<unsigned, unsigned>(unsigned, unsigned)> + llvm::function_ref<std::pair<unsigned, int>(unsigned, unsigned)> GetSourceIndex) { const auto *VT = Call->getType()->getAs<VectorType>(); @@ -11644,8 +11644,16 @@ static bool evalShuffleGeneric( for (unsigned DstIdx = 0; DstIdx != NumElts; ++DstIdx) { auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask); - const APValue &Src = (SrcVecIdx == 0) ? A : B; - ResultElements.push_back(Src.getVectorElt(SrcIdx)); + + if (SrcIdx < 0) { + // Zero out this element + QualType ElemTy = VT->getElementType(); + ResultElements.push_back( + APValue(APFloat::getZero(Info.Ctx.getFloatTypeSemantics(ElemTy)))); + } else { + const APValue &Src = (SrcVecIdx == 0) ? A : B; + ResultElements.push_back(Src.getVectorElt(SrcIdx)); + } } Out = APValue(ResultElements.data(), ResultElements.size()); @@ -12438,7 +12446,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { if (!evalShuffleGeneric( Info, E, R, [](unsigned DstIdx, - unsigned ShuffleMask) -> std::pair<unsigned, unsigned> { + unsigned ShuffleMask) -> std::pair<unsigned, int> { constexpr unsigned LaneBits = 128u; unsigned NumElemPerLane = LaneBits / 32; unsigned NumSelectableElems = NumElemPerLane / 2; @@ -12451,7 +12459,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits; unsigned SrcIdx = (ElemInLane < NumSelectableElems) ? 0 : 1; unsigned Index = (ShuffleMask >> BitIndex) & IndexMask; - return {SrcIdx, LaneOffset + Index}; + return {SrcIdx, static_cast<int>(LaneOffset + Index)}; })) return false; return Success(R, E); @@ -12463,7 +12471,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { if (!evalShuffleGeneric( Info, E, R, [](unsigned DstIdx, - unsigned ShuffleMask) -> std::pair<unsigned, unsigned> { + unsigned ShuffleMask) -> std::pair<unsigned, int> { constexpr unsigned LaneBits = 128u; unsigned NumElemPerLane = LaneBits / 64; unsigned NumSelectableElems = NumElemPerLane / 2; @@ -12476,7 +12484,31 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits; unsigned SrcIdx = (ElemInLane < NumSelectableElems) ? 0 : 1; unsigned Index = (ShuffleMask >> BitIndex) & IndexMask; - return {SrcIdx, LaneOffset + Index}; + return {SrcIdx, static_cast<int>(LaneOffset + Index)}; + })) + return false; + return Success(R, E); + } + case X86::BI__builtin_ia32_insertps128: { + APValue R; + if (!evalShuffleGeneric( + Info, E, R, + [](unsigned DstIdx, unsigned Mask) -> std::pair<unsigned, int> { + // Bits [3:0]: zero mask - if bit is set, zero this element + if ((Mask & (1 << DstIdx)) != 0) { + return {0, -1}; + } + // Bits [7:6]: select element from source vector Y (0-3) + // Bits [5:4]: select destination position (0-3) + unsigned SrcElem = (Mask >> 6) & 0x3; + unsigned DstElem = (Mask >> 4) & 0x3; + if (DstIdx == DstElem) { + // Insert element from source vector (B) at this position + return {1, static_cast<int>(SrcElem)}; + } else { + // Copy from destination vector (A) + return {0, static_cast<int>(DstIdx)}; + } })) return false; return Success(R, E); diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp index f49a5af2c958..9eab70955b6b 100644 --- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp @@ -647,8 +647,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_ballot_w64: { llvm::Type *ResultType = ConvertType(E->getType()); llvm::Value *Src = EmitScalarExpr(E->getArg(0)); - Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, { ResultType }); - return Builder.CreateCall(F, { Src }); + Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType}); + return Builder.CreateCall(F, {Src}); } case AMDGPU::BI__builtin_amdgcn_inverse_ballot_w32: case AMDGPU::BI__builtin_amdgcn_inverse_ballot_w64: { @@ -1139,6 +1139,83 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32: return emitAMDGCNImageOverloadedReturnType( *this, E, Intrinsic::amdgcn_image_sample_cube, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_lz_1d, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_l_1d, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_d_1d, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f16_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_f32_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_lz_2d, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f16_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_f32_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_l_2d, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f16_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_f32_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_d_2d, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_lz_3d, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_l_3d, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_d_3d, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_lz_cube, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_l_cube, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_lz_1darray, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_l_1darray, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_d_1darray, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f16_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_f32_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_lz_2darray, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f16_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_f32_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_l_2darray, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f16_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_f32_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_d_2darray, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_gather4_lz_2d, false); case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4: case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: { llvm::FixedVectorType *VT = FixedVectorType::get(Builder.getInt32Ty(), 8); diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp index e5abf833194d..9ab024a03fbd 100644 --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -356,9 +356,11 @@ bool ContinuationIndenter::canBreak(const LineState &State) { return CurrentState.BreakBeforeClosingBrace; } - // Allow breaking before the right parens with block indentation if there was - // a break after the left parens, which is tracked by BreakBeforeClosingParen. - if (Style.AlignAfterOpenBracket == FormatStyle::BAS_BlockIndent && + // Check need to break before the right parens if there was a break after + // the left parens, which is tracked by BreakBeforeClosingParen. + if ((Style.BreakBeforeCloseBracketFunction || + Style.BreakBeforeCloseBracketIf || Style.BreakBeforeCloseBracketLoop || + Style.BreakBeforeCloseBracketSwitch) && Current.is(tok::r_paren)) { return CurrentState.BreakBeforeClosingParen; } @@ -837,32 +839,38 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, return Tok.is(tok::l_brace) && Tok.isNot(BK_Block) && Style.Cpp11BracedListStyle != FormatStyle::BLS_Block; }; - if (Tok.isNoneOf(tok::l_paren, TT_TemplateOpener, tok::l_square) && - !IsStartOfBracedList()) { + if (IsStartOfBracedList()) + return Style.BreakAfterOpenBracketBracedList; + if (Tok.isNoneOf(tok::l_paren, TT_TemplateOpener, tok::l_square)) return false; - } if (!Tok.Previous) return true; if (Tok.Previous->isIf()) - return Style.AlignAfterOpenBracket == FormatStyle::BAS_AlwaysBreak; - return Tok.Previous->isNoneOf(TT_CastRParen, tok::kw_for, tok::kw_while, - tok::kw_switch) && - !(Style.isJavaScript() && Tok.Previous->is(Keywords.kw_await)); + return Style.BreakAfterOpenBracketIf; + if (Tok.Previous->isLoop(Style)) + return Style.BreakAfterOpenBracketLoop; + if (Tok.Previous->is(tok::kw_switch)) + return Style.BreakAfterOpenBracketSwitch; + if (Style.BreakAfterOpenBracketFunction) { + return !Tok.Previous->is(TT_CastRParen) && + !(Style.isJavaScript() && Tok.is(Keywords.kw_await)); + } + return false; }; auto IsFunctionCallParen = [](const FormatToken &Tok) { return Tok.is(tok::l_paren) && Tok.ParameterCount > 0 && Tok.Previous && Tok.Previous->is(tok::identifier); }; - auto IsInTemplateString = [this](const FormatToken &Tok) { + auto IsInTemplateString = [this](const FormatToken &Tok, bool NestBlocks) { if (!Style.isJavaScript()) return false; for (const auto *Prev = &Tok; Prev; Prev = Prev->Previous) { if (Prev->is(TT_TemplateString) && Prev->opensScope()) return true; - if (Prev->opensScope() || - (Prev->is(TT_TemplateString) && Prev->closesScope())) { - break; - } + if (Prev->opensScope() && !NestBlocks) + return false; + if (Prev->is(TT_TemplateString) && Prev->closesScope()) + return false; } return false; }; @@ -884,21 +892,25 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, Tok.isOneOf(tok::ellipsis, Keywords.kw_await))) { return true; } - if (const auto *Previous = Tok.Previous; - !Previous || (Previous->isNoneOf(TT_FunctionDeclarationLParen, - TT_LambdaDefinitionLParen) && - !IsFunctionCallParen(*Previous))) { + const auto *Previous = TokAfterLParen.Previous; + assert(Previous); // IsOpeningBracket(Previous) + if (Previous->Previous && + (Previous->Previous->isIf() || Previous->Previous->isLoop(Style) || + Previous->Previous->is(tok::kw_switch))) { + return false; + } + if (Previous->isNoneOf(TT_FunctionDeclarationLParen, + TT_LambdaDefinitionLParen) && + !IsFunctionCallParen(*Previous)) { return true; } - if (IsOpeningBracket(Tok) || IsInTemplateString(Tok)) + if (IsOpeningBracket(Tok) || IsInTemplateString(Tok, true)) return true; const auto *Next = Tok.Next; return !Next || Next->isMemberAccess() || Next->is(TT_FunctionDeclarationLParen) || IsFunctionCallParen(*Next); }; - if ((Style.AlignAfterOpenBracket == FormatStyle::BAS_AlwaysBreak || - Style.AlignAfterOpenBracket == FormatStyle::BAS_BlockIndent) && - IsOpeningBracket(Previous) && State.Column > getNewLineColumn(State) && + if (IsOpeningBracket(Previous) && State.Column > getNewLineColumn(State) && // Don't do this for simple (no expressions) one-argument function calls // as that feels like needlessly wasting whitespace, e.g.: // @@ -920,7 +932,7 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, // Note: This doesn't apply to macro expansion lines, which are MACRO( , , ) // with args as children of the '(' and ',' tokens. It does not make sense to // align the commas with the opening paren. - if (Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign && + if (Style.AlignAfterOpenBracket && !CurrentState.IsCSharpGenericTypeConstraint && Previous.opensScope() && Previous.isNoneOf(TT_ObjCMethodExpr, TT_RequiresClause, TT_TableGenDAGArgOpener, @@ -933,7 +945,7 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, Previous.Previous->isNoneOf(tok::identifier, tok::l_paren, BK_BracedInit))) || Previous.is(TT_VerilogMultiLineListLParen)) && - !IsInTemplateString(Current)) { + !IsInTemplateString(Current, false)) { CurrentState.Indent = State.Column + Spaces; CurrentState.IsAligned = true; } @@ -1271,8 +1283,20 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State, } if (PreviousNonComment && PreviousNonComment->is(tok::l_paren)) { - CurrentState.BreakBeforeClosingParen = - Style.AlignAfterOpenBracket == FormatStyle::BAS_BlockIndent; + if (auto Previous = PreviousNonComment->Previous) { + if (Previous->isIf()) { + CurrentState.BreakBeforeClosingParen = Style.BreakBeforeCloseBracketIf; + } else if (Previous->isLoop(Style)) { + CurrentState.BreakBeforeClosingParen = + Style.BreakBeforeCloseBracketLoop; + } else if (Previous->is(tok::kw_switch)) { + CurrentState.BreakBeforeClosingParen = + Style.BreakBeforeCloseBracketSwitch; + } else { + CurrentState.BreakBeforeClosingParen = + Style.BreakBeforeCloseBracketFunction; + } + } } if (PreviousNonComment && PreviousNonComment->is(TT_TemplateOpener)) @@ -1416,13 +1440,17 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) { State.Stack.size() > 1) { return State.Stack[State.Stack.size() - 2].LastSpace; } - if (Style.AlignAfterOpenBracket == FormatStyle::BAS_BlockIndent && - (Current.is(tok::r_paren) || - (Current.is(tok::r_brace) && Current.MatchingParen && - Current.MatchingParen->is(BK_BracedInit))) && + if (Style.BreakBeforeCloseBracketBracedList && Current.is(tok::r_brace) && + Current.MatchingParen && Current.MatchingParen->is(BK_BracedInit) && State.Stack.size() > 1) { return State.Stack[State.Stack.size() - 2].LastSpace; } + if ((Style.BreakBeforeCloseBracketFunction || + Style.BreakBeforeCloseBracketIf || Style.BreakBeforeCloseBracketLoop || + Style.BreakBeforeCloseBracketSwitch) && + Current.is(tok::r_paren) && State.Stack.size() > 1) { + return State.Stack[State.Stack.size() - 2].LastSpace; + } if (Style.BreakBeforeTemplateCloser && Current.is(TT_TemplateCloser) && State.Stack.size() > 1) { return State.Stack[State.Stack.size() - 2].LastSpace; @@ -1844,8 +1872,8 @@ void ContinuationIndenter::moveStatePastFakeLParens(LineState &State, PrecedenceLevel < prec::Assignment) && (!Previous || Previous->isNot(tok::kw_return) || (!Style.isJava() && PrecedenceLevel > 0)) && - (Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign || - PrecedenceLevel > prec::Comma || Current.NestingLevel == 0) && + (Style.AlignAfterOpenBracket || PrecedenceLevel > prec::Comma || + Current.NestingLevel == 0) && (!Style.isTableGen() || (Previous && Previous->isOneOf(TT_TableGenDAGArgListComma, TT_TableGenDAGArgListCommaToBreak)))) { @@ -1885,8 +1913,7 @@ void ContinuationIndenter::moveStatePastFakeLParens(LineState &State, if (PrecedenceLevel > prec::Unknown) NewParenState.LastSpace = std::max(NewParenState.LastSpace, State.Column); if (PrecedenceLevel != prec::Conditional && - Current.isNot(TT_UnaryOperator) && - Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign) { + Current.isNot(TT_UnaryOperator) && Style.AlignAfterOpenBracket) { NewParenState.StartOfFunctionCall = State.Column; } diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index edd126c7724b..dd14fcd72922 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -32,6 +32,13 @@ using clang::format::FormatStyle; LLVM_YAML_IS_SEQUENCE_VECTOR(FormatStyle::RawStringFormat) +enum BracketAlignmentStyle : int8_t { + BAS_Align, + BAS_DontAlign, + BAS_AlwaysBreak, + BAS_BlockIndent +}; + namespace llvm { namespace yaml { template <> @@ -204,16 +211,16 @@ template <> struct MappingTraits<FormatStyle::BraceWrappingFlags> { } }; -template <> struct ScalarEnumerationTraits<FormatStyle::BracketAlignmentStyle> { - static void enumeration(IO &IO, FormatStyle::BracketAlignmentStyle &Value) { - IO.enumCase(Value, "Align", FormatStyle::BAS_Align); - IO.enumCase(Value, "DontAlign", FormatStyle::BAS_DontAlign); - IO.enumCase(Value, "AlwaysBreak", FormatStyle::BAS_AlwaysBreak); - IO.enumCase(Value, "BlockIndent", FormatStyle::BAS_BlockIndent); +template <> struct ScalarEnumerationTraits<BracketAlignmentStyle> { + static void enumeration(IO &IO, BracketAlignmentStyle &Value) { + IO.enumCase(Value, "Align", BAS_Align); + IO.enumCase(Value, "DontAlign", BAS_DontAlign); // For backward compatibility. - IO.enumCase(Value, "true", FormatStyle::BAS_Align); - IO.enumCase(Value, "false", FormatStyle::BAS_DontAlign); + IO.enumCase(Value, "true", BAS_Align); + IO.enumCase(Value, "false", BAS_DontAlign); + IO.enumCase(Value, "AlwaysBreak", BAS_AlwaysBreak); + IO.enumCase(Value, "BlockIndent", BAS_BlockIndent); } }; @@ -979,6 +986,54 @@ template <> struct MappingTraits<FormatStyle> { bool SpacesInCStyleCastParentheses = false; bool SpacesInParentheses = false; + if (IO.outputting()) { + IO.mapOptional("AlignAfterOpenBracket", Style.AlignAfterOpenBracket); + } else { + // For backward compatibility. + BracketAlignmentStyle LocalBAS = BAS_Align; + if (IsGoogleOrChromium) { + FormatStyle::LanguageKind Language = Style.Language; + if (Language == FormatStyle::LK_None) + Language = ((FormatStyle *)IO.getContext())->Language; + if (Language == FormatStyle::LK_JavaScript) + LocalBAS = BAS_AlwaysBreak; + else if (Language == FormatStyle::LK_Java) + LocalBAS = BAS_DontAlign; + } else if (BasedOnStyle.equals_insensitive("webkit")) { + LocalBAS = BAS_DontAlign; + } + IO.mapOptional("AlignAfterOpenBracket", LocalBAS); + Style.BreakAfterOpenBracketBracedList = false; + Style.BreakAfterOpenBracketFunction = false; + Style.BreakAfterOpenBracketIf = false; + Style.BreakAfterOpenBracketLoop = false; + Style.BreakAfterOpenBracketSwitch = false; + Style.BreakBeforeCloseBracketBracedList = false; + Style.BreakBeforeCloseBracketFunction = false; + Style.BreakBeforeCloseBracketIf = false; + Style.BreakBeforeCloseBracketLoop = false; + Style.BreakBeforeCloseBracketSwitch = false; + + switch (LocalBAS) { + case BAS_DontAlign: + Style.AlignAfterOpenBracket = false; + break; + case BAS_BlockIndent: + Style.BreakBeforeCloseBracketBracedList = true; + Style.BreakBeforeCloseBracketFunction = true; + Style.BreakBeforeCloseBracketIf = true; + [[fallthrough]]; + case BAS_AlwaysBreak: + Style.BreakAfterOpenBracketBracedList = true; + Style.BreakAfterOpenBracketFunction = true; + Style.BreakAfterOpenBracketIf = true; + [[fallthrough]]; + case BAS_Align: + Style.AlignAfterOpenBracket = true; + break; + } + } + // For backward compatibility. if (!IO.outputting()) { IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlines); @@ -1014,7 +1069,6 @@ template <> struct MappingTraits<FormatStyle> { } IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset); - IO.mapOptional("AlignAfterOpenBracket", Style.AlignAfterOpenBracket); IO.mapOptional("AlignArrayOfStructures", Style.AlignArrayOfStructures); IO.mapOptional("AlignConsecutiveAssignments", Style.AlignConsecutiveAssignments); @@ -1079,10 +1133,29 @@ template <> struct MappingTraits<FormatStyle> { IO.mapOptional("BreakAfterAttributes", Style.BreakAfterAttributes); IO.mapOptional("BreakAfterJavaFieldAnnotations", Style.BreakAfterJavaFieldAnnotations); + IO.mapOptional("BreakAfterOpenBracketBracedList", + Style.BreakAfterOpenBracketBracedList); + IO.mapOptional("BreakAfterOpenBracketFunction", + Style.BreakAfterOpenBracketFunction); + IO.mapOptional("BreakAfterOpenBracketIf", Style.BreakAfterOpenBracketIf); + IO.mapOptional("BreakAfterOpenBracketLoop", + Style.BreakAfterOpenBracketLoop); + IO.mapOptional("BreakAfterOpenBracketSwitch", + Style.BreakAfterOpenBracketSwitch); IO.mapOptional("BreakAfterReturnType", Style.BreakAfterReturnType); IO.mapOptional("BreakArrays", Style.BreakArrays); IO.mapOptional("BreakBeforeBinaryOperators", Style.BreakBeforeBinaryOperators); + IO.mapOptional("BreakBeforeCloseBracketBracedList", + Style.BreakBeforeCloseBracketBracedList); + IO.mapOptional("BreakBeforeCloseBracketFunction", + Style.BreakBeforeCloseBracketFunction); + IO.mapOptional("BreakBeforeCloseBracketIf", + Style.BreakBeforeCloseBracketIf); + IO.mapOptional("BreakBeforeCloseBracketLoop", + Style.BreakBeforeCloseBracketLoop); + IO.mapOptional("BreakBeforeCloseBracketSwitch", + Style.BreakBeforeCloseBracketSwitch); IO.mapOptional("BreakBeforeConceptDeclarations", Style.BreakBeforeConceptDeclarations); IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces); @@ -1561,7 +1634,7 @@ static void expandPresetsSpacesInParens(FormatStyle &Expanded) { FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) { FormatStyle LLVMStyle; LLVMStyle.AccessModifierOffset = -2; - LLVMStyle.AlignAfterOpenBracket = FormatStyle::BAS_Align; + LLVMStyle.AlignAfterOpenBracket = true; LLVMStyle.AlignArrayOfStructures = FormatStyle::AIAS_None; LLVMStyle.AlignConsecutiveAssignments = {}; LLVMStyle.AlignConsecutiveAssignments.PadOperators = true; @@ -1621,10 +1694,20 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) { LLVMStyle.BreakAdjacentStringLiterals = true; LLVMStyle.BreakAfterAttributes = FormatStyle::ABS_Leave; LLVMStyle.BreakAfterJavaFieldAnnotations = false; + LLVMStyle.BreakAfterOpenBracketBracedList = false; + LLVMStyle.BreakAfterOpenBracketFunction = false; + LLVMStyle.BreakAfterOpenBracketIf = false; + LLVMStyle.BreakAfterOpenBracketLoop = false; + LLVMStyle.BreakAfterOpenBracketSwitch = false; LLVMStyle.BreakAfterReturnType = FormatStyle::RTBS_None; LLVMStyle.BreakArrays = true; LLVMStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_None; LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach; + LLVMStyle.BreakBeforeCloseBracketBracedList = false; + LLVMStyle.BreakBeforeCloseBracketFunction = false; + LLVMStyle.BreakBeforeCloseBracketIf = false; + LLVMStyle.BreakBeforeCloseBracketLoop = false; + LLVMStyle.BreakBeforeCloseBracketSwitch = false; LLVMStyle.BreakBeforeConceptDeclarations = FormatStyle::BBCDS_Always; LLVMStyle.BreakBeforeInlineASMColon = FormatStyle::BBIAS_OnlyMultiline; LLVMStyle.BreakBeforeTemplateCloser = false; @@ -1877,7 +1960,7 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) { GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200; if (Language == FormatStyle::LK_Java) { - GoogleStyle.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign; + GoogleStyle.AlignAfterOpenBracket = false; GoogleStyle.AlignOperands = FormatStyle::OAS_DontAlign; GoogleStyle.AlignTrailingComments = {}; GoogleStyle.AlignTrailingComments.Kind = FormatStyle::TCAS_Never; @@ -1889,7 +1972,9 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) { GoogleStyle.SpaceAfterCStyleCast = true; GoogleStyle.SpacesBeforeTrailingComments = 1; } else if (Language == FormatStyle::LK_JavaScript) { - GoogleStyle.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak; + GoogleStyle.BreakAfterOpenBracketBracedList = true; + GoogleStyle.BreakAfterOpenBracketFunction = true; + GoogleStyle.BreakAfterOpenBracketIf = true; GoogleStyle.AlignOperands = FormatStyle::OAS_DontAlign; GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Empty; // TODO: still under discussion whether to switch to SLS_All. @@ -2026,7 +2111,7 @@ FormatStyle getMozillaStyle() { FormatStyle getWebKitStyle() { FormatStyle Style = getLLVMStyle(); Style.AccessModifierOffset = -4; - Style.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign; + Style.AlignAfterOpenBracket = false; Style.AlignOperands = FormatStyle::OAS_DontAlign; Style.AlignTrailingComments = {}; Style.AlignTrailingComments.Kind = FormatStyle::TCAS_Never; diff --git a/clang/lib/Format/FormatToken.cpp b/clang/lib/Format/FormatToken.cpp index d1c62642efd4..28fdbcbf0e47 100644 --- a/clang/lib/Format/FormatToken.cpp +++ b/clang/lib/Format/FormatToken.cpp @@ -68,7 +68,7 @@ bool FormatToken::isBlockIndentedInitRBrace(const FormatStyle &Style) const { assert(MatchingParen); assert(MatchingParen->is(tok::l_brace)); if (Style.Cpp11BracedListStyle == FormatStyle::BLS_Block || - Style.AlignAfterOpenBracket != FormatStyle::BAS_BlockIndent) { + !Style.BreakBeforeCloseBracketBracedList) { return false; } const auto *LBrace = MatchingParen; @@ -198,7 +198,7 @@ void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) { return; // Column format doesn't really make sense if we don't align after brackets. - if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign) + if (!Style.AlignAfterOpenBracket) return; FormatToken *ItemBegin = Token->Next; diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h index 6f3d24aefc1c..d833130a538f 100644 --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -666,6 +666,12 @@ public: (endsSequence(tok::identifier, tok::kw_if) && AllowConstexprMacro); } + bool isLoop(const FormatStyle &Style) const { + return isOneOf(tok::kw_for, tok::kw_while) || + (Style.isJavaScript() && isNot(tok::l_paren) && Previous && + Previous->is(tok::kw_for)); + } + bool closesScopeAfterBlock() const { if (getBlockKind() == BK_Block) return true; diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 021d8c658eb1..8e227da2a79a 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -4427,10 +4427,8 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, if (Left.is(tok::l_paren) && Style.PenaltyBreakOpenParenthesis != 0) return Style.PenaltyBreakOpenParenthesis; - if (Left.is(tok::l_paren) && InFunctionDecl && - Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign) { + if (Left.is(tok::l_paren) && InFunctionDecl && Style.AlignAfterOpenBracket) return 100; - } if (Left.is(tok::l_paren) && Left.Previous && (Left.Previous->isOneOf(tok::kw_for, tok::kw__Generic) || Left.Previous->isIf())) { @@ -4446,7 +4444,7 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, // If we aren't aligning after opening parens/braces we can always break // here unless the style does not want us to place all arguments on the // next line. - if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign && + if (!Style.AlignAfterOpenBracket && (Left.ParameterCount <= 1 || Style.AllowAllArgumentsOnNextLine)) { return 0; } @@ -6226,24 +6224,31 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, (Right.isBlockIndentedInitRBrace(Style))); } - // We only break before r_paren if we're in a block indented context. + // We can break before r_paren if we're in a block indented context or + // a control statement with an explicit style option. if (Right.is(tok::r_paren)) { - if (Style.AlignAfterOpenBracket != FormatStyle::BAS_BlockIndent || - !Right.MatchingParen) { + if (!Right.MatchingParen) return false; - } auto Next = Right.Next; if (Next && Next->is(tok::r_paren)) Next = Next->Next; if (Next && Next->is(tok::l_paren)) return false; const FormatToken *Previous = Right.MatchingParen->Previous; - return !(Previous && (Previous->is(tok::kw_for) || Previous->isIf())); + if (!Previous) + return false; + if (Previous->isIf()) + return Style.BreakBeforeCloseBracketIf; + if (Previous->isLoop(Style)) + return Style.BreakBeforeCloseBracketLoop; + if (Previous->is(tok::kw_switch)) + return Style.BreakBeforeCloseBracketSwitch; + return Style.BreakBeforeCloseBracketFunction; } if (Left.isOneOf(tok::r_paren, TT_TrailingAnnotation) && Right.is(TT_TrailingAnnotation) && - Style.AlignAfterOpenBracket == FormatStyle::BAS_BlockIndent) { + Style.BreakBeforeCloseBracketFunction) { return false; } diff --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp index 65c324c10ca5..f05c28fd7a12 100644 --- a/clang/lib/Lex/HeaderSearch.cpp +++ b/clang/lib/Lex/HeaderSearch.cpp @@ -221,7 +221,7 @@ std::string HeaderSearch::getPrebuiltModuleFileName(StringRef ModuleName, // file. for (const std::string &Dir : HSOpts.PrebuiltModulePaths) { SmallString<256> Result(Dir); - llvm::sys::fs::make_absolute(Result); + FileMgr.makeAbsolutePath(Result); if (ModuleName.contains(':')) // The separator of C++20 modules partitions (':') is not good for file // systems, here clang and gcc choose '-' by default since it is not a @@ -246,7 +246,7 @@ std::string HeaderSearch::getPrebuiltImplicitModuleFileName(Module *Module) { StringRef ModuleCacheHash = HSOpts.DisableModuleHash ? "" : getModuleHash(); for (const std::string &Dir : HSOpts.PrebuiltModulePaths) { SmallString<256> CachePath(Dir); - llvm::sys::fs::make_absolute(CachePath); + FileMgr.makeAbsolutePath(CachePath); llvm::sys::path::append(CachePath, ModuleCacheHash); std::string FileName = getCachedModuleFileNameImpl(ModuleName, ModuleMapPath, CachePath); diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp index e32f4376a5eb..139c4abc040d 100644 --- a/clang/lib/Sema/SemaAMDGPU.cpp +++ b/clang/lib/Sema/SemaAMDGPU.cpp @@ -153,7 +153,48 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f16_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f32_f32: - case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32: { + case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32: { StringRef FeatureList( getASTContext().BuiltinInfo.getRequiredFeatures(BuiltinID)); if (!Builtin::evaluateRequiredTargetFeatures(FeatureList, diff --git a/clang/lib/Tooling/Transformer/RangeSelector.cpp b/clang/lib/Tooling/Transformer/RangeSelector.cpp index 171c786bc366..b4bdec1fcdd6 100644 --- a/clang/lib/Tooling/Transformer/RangeSelector.cpp +++ b/clang/lib/Tooling/Transformer/RangeSelector.cpp @@ -205,8 +205,12 @@ RangeSelector transformer::name(std::string ID) { // `foo<int>` for which this range will be too short. Doing so will // require subcasing `NamedDecl`, because it doesn't provide virtual // access to the \c DeclarationNameInfo. - if (tooling::getText(R, *Result.Context) != D->getName()) - return CharSourceRange(); + StringRef Text = tooling::getText(R, *Result.Context); + if (Text != D->getName()) + return llvm::make_error<StringError>( + llvm::errc::not_supported, + "range selected by name(node id=" + ID + "): '" + Text + + "' is different from decl name '" + D->getName() + "'"); return R; } if (const auto *E = Node.get<DeclRefExpr>()) { diff --git a/clang/test/CXX/drs/cwg0xx.cpp b/clang/test/CXX/drs/cwg0xx.cpp index 805be67f2dc1..10a4f1d6add3 100644 --- a/clang/test/CXX/drs/cwg0xx.cpp +++ b/clang/test/CXX/drs/cwg0xx.cpp @@ -90,6 +90,8 @@ namespace cwg5 { // cwg5: 3.1 const C c = e; } // namespace cwg5 +// cwg6 is in cwg6.cpp + namespace cwg7 { // cwg7: 3.4 class A { public: ~A(); }; class B : virtual private A {}; // #cwg7-B diff --git a/clang/test/CXX/drs/cwg28xx.cpp b/clang/test/CXX/drs/cwg28xx.cpp index a6b2b99e0c3f..d0ee191ef23d 100644 --- a/clang/test/CXX/drs/cwg28xx.cpp +++ b/clang/test/CXX/drs/cwg28xx.cpp @@ -61,6 +61,24 @@ namespace cwg2819 { // cwg2819: 19 c++26 #endif } // namespace cwg2819 +namespace cwg2823 { // cwg2823: no +#if __cplusplus >= 201103L + constexpr int *p = 0; + constexpr int *q1 = &*p; + // expected-error@-1 {{constexpr variable 'q1' must be initialized by a constant expression}} + // expected-note@-2 {{dereferencing a null pointer is not allowed in a constant expression}} + // FIXME: invalid: dereferencing a null pointer. + constexpr int *q2 = &p[0]; + + int arr[32]; + constexpr int *r = arr; + // FIXME: invalid: dereferencing a past-the-end pointer. + constexpr int *s1 = &*(r + 32); + // FIXME: invalid: dereferencing a past-the-end pointer. + constexpr int *s2 = &r[32]; +#endif +} + namespace cwg2847 { // cwg2847: 19 review 2024-03-01 #if __cplusplus >= 202002L diff --git a/clang/test/CXX/drs/cwg2xx.cpp b/clang/test/CXX/drs/cwg2xx.cpp index 37186e3c3f20..a4995ddc2c58 100644 --- a/clang/test/CXX/drs/cwg2xx.cpp +++ b/clang/test/CXX/drs/cwg2xx.cpp @@ -230,6 +230,38 @@ namespace cwg211 { // cwg211: 2.7 }; } // namespace cwg211 +namespace cwg212 { // cwg212: 2.7 + template<typename T> struct Base; + template<typename T> struct Derived; + + int *overload(void*); + float *overload(Base<int>*); + double *overload(Base<long>*); + + void f(Derived<int> *p) { + // OK, calls void* overload. + int *a = overload(p); + + Base<int> *q = p; + // expected-error@-1 {{cannot initialize a variable of type 'Base<int> *' with an lvalue of type 'Derived<int> *'}} + } + + template<typename T> struct Base {}; + template<typename T> struct Derived : Base<T> {}; + + void g(Derived<long> *p) { + // OK, instantiates and calls Base<long>* overlod. + double *b = overload(p); + (void)b; + } + + void h(Derived<float> *p) { + // OK, instantiates and converts. + Base<float> *q = p; + (void)q; + } +} + namespace cwg213 { // cwg213: 2.7 template <class T> struct A : T { void h(T t) { @@ -593,6 +625,9 @@ namespace cwg231 { // cwg231: 2.7 } } // namespace cwg231 +// 232 is NAD; the desired behavior is described in 2823. +// cwg232: dup 2823 + // cwg234: na // cwg235: na diff --git a/clang/test/CXX/drs/cwg6.cpp b/clang/test/CXX/drs/cwg6.cpp new file mode 100644 index 000000000000..4752e72034c7 --- /dev/null +++ b/clang/test/CXX/drs/cwg6.cpp @@ -0,0 +1,51 @@ +// RUN: %clang_cc1 -std=c++98 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++11 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++14 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++17 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++20 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++23 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++2c %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | FileCheck %s --check-prefixes CHECK + +#if __cplusplus == 199711L +#define static_assert(expr) __extension__ _Static_assert(expr) +#define noexcept throw() +#endif + +namespace cwg6 { // cwg6: 2.7 +#if __cplusplus >= 201103L +struct Counter { + int copies; + constexpr Counter(int copies) : copies(copies) {} + constexpr Counter(const Counter& other) : copies(other.copies + 1) {} +}; + +// Passing an lvalue by value makes a non-elidable copy. +constexpr int PassByValue(Counter c) { return c.copies; } +constexpr int PassByValue2(Counter c) { return PassByValue(c); } +constexpr int PassByValue3(Counter c) { return PassByValue2(c); } +static_assert(PassByValue(Counter(0)) == 0, "expect no copies"); +static_assert(PassByValue2(Counter(0)) == 1, "expect 1 copy"); +static_assert(PassByValue3(Counter(0)) == 2, "expect 2 copies"); +#endif + +struct A { + A() noexcept; + A(const A&) noexcept; + ~A() noexcept; +}; + +inline void f(A a) noexcept {} + +// CHECK-LABEL: define {{.*}} @_ZN4cwg64callEv +void call() { + A a; + // We copy the parameter here, even though object is not mutated by f and + // otherwise satisfies the criteria for the proposed CWG6 optimization. + // CHECK: call {{.*}} @_ZN4cwg61AC1ERKS0_( + // CHECK: call {{.*}} @_ZN4cwg61fENS_1AE( + f(a); + // CHECK: call {{.*}} @_ZN4cwg61AD1Ev( + // CHECK: call {{.*}} @_ZN4cwg61AD1Ev( +} + +} // namespace cwg6 diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c index 62cd392824bb..35fa65a99836 100644 --- a/clang/test/CodeGen/X86/sse41-builtins.c +++ b/clang/test/CodeGen/X86/sse41-builtins.c @@ -307,6 +307,16 @@ __m128 test_mm_insert_ps(__m128 x, __m128 y) { return _mm_insert_ps(x, y, 4); } +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x10), 1.0f, 10.0f, 3.0f, 4.0f))); // Insert Y[0] into X[1] +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x00), 10.0f, 2.0f, 3.0f, 4.0f))); // Insert Y[0] into X[0] +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x20), 1.0f, 2.0f, 10.0f, 4.0f))); // Insert Y[0] into X[2] +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x30), 1.0f, 2.0f, 3.0f, 10.0f))); // Insert Y[0] into X[3] +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x80), 30.0f, 2.0f, 3.0f, 4.0f))); // Insert Y[2] into X[0] +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x01), 0.0f, 2.0f, 3.0f, 4.0f))); // Insert Y[0] into X[0], zero X[0] +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x0A), 10.0f, 0.0f, 3.0f, 0.0f))); // Insert Y[0] into X[0], zero X[1] and X[3] +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x0F), 0.0f, 0.0f, 0.0f, 0.0f))); // Insert Y[0] into X[0], zero all +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0xCF), 0.0f, 0.0f, 0.0f, 0.0f))); // Insert Y[3] into X[0], zero all + __m128i test_mm_max_epi8(__m128i x, __m128i y) { // CHECK-LABEL: test_mm_max_epi8 // CHECK: call <16 x i8> @llvm.smax.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) diff --git a/clang/test/CodeGen/builtins-extended-image.c b/clang/test/CodeGen/builtins-extended-image.c new file mode 100644 index 000000000000..0dbf81dabd77 --- /dev/null +++ b/clang/test/CodeGen/builtins-extended-image.c @@ -0,0 +1,1528 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1100 -target-feature +extended-image-insts %s -emit-llvm -o - | FileCheck %s + +typedef int int4 __attribute__((ext_vector_type(4))); +typedef float float4 __attribute__((ext_vector_type(4))); +typedef _Float16 half4 __attribute__((ext_vector_type(4))); + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_gather4_lz_2d_v4f32_f32_r( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP4]] +// +float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_r(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(1, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_gather4_lz_2d_v4f32_f32_g( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32.v8i32.v4i32(i32 2, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP4]] +// +float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_g(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(2, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_gather4_lz_2d_v4f32_f32_b( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32.v8i32.v4i32(i32 4, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP4]] +// +float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_b(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(4, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_gather4_lz_2d_v4f32_f32_a( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32.v8i32.v4i32(i32 8, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP4]] +// +float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_a(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(8, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_lz_1d_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32 +// CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP2]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP3]] +// +float4 test_amdgcn_image_sample_lz_1d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_1d_v4f32_f32(100, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_l_1d_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP4]] +// +float4 test_amdgcn_image_sample_l_1d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_1d_v4f32_f32(100, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_d_1d_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP5]] +// +float4 test_amdgcn_image_sample_d_1d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_1d_v4f32_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_lz_2d_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP4]] +// +float4 test_amdgcn_image_sample_lz_2d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_2d_v4f32_f32(100, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_l_2d_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32.v8i32.v4i32(i32 10, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP5]] +// +float4 test_amdgcn_image_sample_l_2d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_2d_v4f32_f32(10, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_d_2d_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP6]], align 32 +// CHECK-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP8:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP7]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP8]] +// +float4 test_amdgcn_image_sample_d_2d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_2d_v4f32_f32(100, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_lz_3d_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.3d.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP5]] +// +float4 test_amdgcn_image_sample_lz_3d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_3d_v4f32_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_l_3d_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP6:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.3d.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP6]] +// +float4 test_amdgcn_image_sample_l_3d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_3d_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_d_3d_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP9]], align 32 +// CHECK-NEXT: [[TMP10:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP11:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], float [[TMP6]], float [[TMP7]], float [[TMP8]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP10]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP11]] +// +float4 test_amdgcn_image_sample_d_3d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_3d_v4f32_f32(1, f32, f32, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_lz_cube_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.cube.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP5]] +// +float4 test_amdgcn_image_sample_lz_cube_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_cube_v4f32_f32(1, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_l_cube_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP6:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.cube.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP6]] +// +float4 test_amdgcn_image_sample_l_cube_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_cube_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_lz_1darray_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.1darray.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP4]] +// +float4 test_amdgcn_image_sample_lz_1darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_1darray_v4f32_f32(1, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_l_1darray_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.1darray.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP5]] +// +float4 test_amdgcn_image_sample_l_1darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_1darray_v4f32_f32(1, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_d_1darray_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP6:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.1darray.v4f32.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP6]] +// +float4 test_amdgcn_image_sample_d_1darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_1darray_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_lz_2darray_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.2darray.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP5]] +// +float4 test_amdgcn_image_sample_lz_2darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_2darray_v4f32_f32(1, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_l_2darray_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP6:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.2darray.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP6]] +// +float4 test_amdgcn_image_sample_l_2darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_2darray_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_d_2darray_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP7]], align 32 +// CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.2darray.v4f32.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], float [[TMP6]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP8]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP9]] +// +float4 test_amdgcn_image_sample_d_2darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_2darray_v4f32_f32(1, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_lz_1d_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32 +// CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.1d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP2]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP3]] +// +half4 test_amdgcn_image_sample_lz_1d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_1d_v4f16_f32(100, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_l_1d_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.l.1d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP4]] +// +half4 test_amdgcn_image_sample_l_1d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_1d_v4f16_f32(100, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_d_1d_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.d.1d.v4f16.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP5]] +// +half4 test_amdgcn_image_sample_d_1d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_1d_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_lz_2d_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.2d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP4]] +// +half4 test_amdgcn_image_sample_lz_2d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_2d_v4f16_f32(100, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_l_2d_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.l.2d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP5]] +// +half4 test_amdgcn_image_sample_l_2d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_2d_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_d_2d_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP6]], align 32 +// CHECK-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP8:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.d.2d.v4f16.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP7]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP8]] +// +half4 test_amdgcn_image_sample_d_2d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_2d_v4f16_f32(100, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_lz_3d_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.3d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP5]] +// +half4 test_amdgcn_image_sample_lz_3d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_3d_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_l_3d_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP6:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.l.3d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP6]] +// +half4 test_amdgcn_image_sample_l_3d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_3d_v4f16_f32(100, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_d_3d_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP9]], align 32 +// CHECK-NEXT: [[TMP10:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP11:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.d.3d.v4f16.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], float [[TMP6]], float [[TMP7]], float [[TMP8]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP10]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP11]] +// +half4 test_amdgcn_image_sample_d_3d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_3d_v4f16_f32(100, f32, f32, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_lz_cube_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.cube.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP5]] +// +half4 test_amdgcn_image_sample_lz_cube_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_cube_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_l_cube_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP6:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.l.cube.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP6]] +// +half4 test_amdgcn_image_sample_l_cube_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_cube_v4f16_f32(100, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_lz_1darray_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.1darray.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP4]] +// +half4 test_amdgcn_image_sample_lz_1darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_1darray_v4f16_f32(100, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_l_1darray_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.l.1darray.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP5]] +// +half4 test_amdgcn_image_sample_l_1darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_1darray_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_d_1darray_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP6:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.d.1darray.v4f16.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP6]] +// +half4 test_amdgcn_image_sample_d_1darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_1darray_v4f16_f32(100, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_lz_2darray_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.2darray.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP5]] +// +half4 test_amdgcn_image_sample_lz_2darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_2darray_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_l_2darray_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP6:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.l.2darray.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP6]] +// +half4 test_amdgcn_image_sample_l_2darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_2darray_v4f16_f32(100, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_d_2darray_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP7]], align 32 +// CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.d.2darray.v4f16.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], float [[TMP6]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP8]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP9]] +// +half4 test_amdgcn_image_sample_d_2darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_2darray_v4f16_f32(100, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local float @test_amdgcn_image_sample_lz_2d_f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.image.sample.lz.2d.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret float [[TMP4]] +// +float test_amdgcn_image_sample_lz_2d_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_2d_f32_f32(1, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local float @test_amdgcn_image_sample_l_2d_f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.image.sample.l.2d.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret float [[TMP5]] +// +float test_amdgcn_image_sample_l_2d_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_2d_f32_f32(1, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local float @test_amdgcn_image_sample_d_2d_f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP6]], align 32 +// CHECK-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP8:%.*]] = call float @llvm.amdgcn.image.sample.d.2d.f32.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP7]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret float [[TMP8]] +// +float test_amdgcn_image_sample_d_2d_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_2d_f32_f32(1, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local float @test_amdgcn_image_sample_lz_2darray_f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.image.sample.lz.2darray.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret float [[TMP5]] +// +float test_amdgcn_image_sample_lz_2darray_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_2darray_f32_f32(1, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local float @test_amdgcn_image_sample_l_2darray_f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.amdgcn.image.sample.l.2darray.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret float [[TMP6]] +// +float test_amdgcn_image_sample_l_2darray_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_2darray_f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local float @test_amdgcn_image_sample_d_2darray_f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP7]], align 32 +// CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.image.sample.d.2darray.f32.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], float [[TMP6]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP8]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret float [[TMP9]] +// +float test_amdgcn_image_sample_d_2darray_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_2darray_f32_f32(1, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} diff --git a/clang/test/SemaOpenCL/builtins-extended-image-param-gfx1100-err.cl b/clang/test/SemaOpenCL/builtins-extended-image-param-gfx1100-err.cl new file mode 100644 index 000000000000..47dbdd4e5178 --- /dev/null +++ b/clang/test/SemaOpenCL/builtins-extended-image-param-gfx1100-err.cl @@ -0,0 +1,227 @@ +// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1100 -target-feature +extended-image-insts -S -verify=expected -o - %s +// REQUIRES: amdgpu-registered-target + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +typedef int int4 __attribute__((ext_vector_type(4))); +typedef float float4 __attribute__((ext_vector_type(4))); +typedef half half4 __attribute__((ext_vector_type(4))); + +float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_r(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(1, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_g(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(2, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_b(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(4, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_a(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(8, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_sample_lz_1d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_1d_v4f32_f32(i32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_1d_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_sample_l_1d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_1d_v4f32_f32(100, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_1d_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_sample_d_1d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_1d_v4f32_f32(100, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_d_1d_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_sample_lz_2d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_2d_v4f32_f32(100, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_2d_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_sample_l_2d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_2d_v4f32_f32(100, f32, f32, f32, tex, vec4i32, 0, f32, 103); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_2d_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_sample_d_2d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_2d_v4f32_f32(i32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_d_2d_v4f32_f32' must be a constant integer}} +} +float4 test_amdgcn_image_sample_lz_3d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_3d_v4f32_f32(i32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_3d_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_sample_l_3d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_3d_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_3d_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_sample_d_3d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_3d_v4f32_f32(1, f32, f32, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_d_3d_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_sample_lz_cube_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_cube_v4f32_f32(1, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_cube_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_sample_l_cube_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_cube_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_cube_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_sample_lz_1darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_1darray_v4f32_f32(1, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_1darray_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_sample_l_1darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_1darray_v4f32_f32(1, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_1darray_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_sample_d_1darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_1darray_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_d_1darray_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_sample_lz_2darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_2darray_v4f32_f32(1, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_2darray_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_sample_l_2darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_2darray_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_2darray_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_sample_d_2darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_2darray_v4f32_f32(1, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_d_2darray_v4f32_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_lz_1d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_1d_v4f16_f32(23, f32, tex, vec4i32, 0, i32, 11); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_1d_v4f16_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_l_1d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_1d_v4f16_f32(i32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_1d_v4f16_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_d_1d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_1d_v4f16_f32(i32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_d_1d_v4f16_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_lz_2d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_2d_v4f16_f32(100, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_2d_v4f16_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_l_2d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_2d_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_2d_v4f16_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_d_2d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_2d_v4f16_f32(100, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_d_2d_v4f16_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_lz_3d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_3d_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_3d_v4f16_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_l_3d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_3d_v4f16_f32(100, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_3d_v4f16_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_d_3d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_3d_v4f16_f32(100, f32, f32, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_d_3d_v4f16_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_lz_cube_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_cube_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_cube_v4f16_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_l_cube_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_cube_v4f16_f32(i32, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_cube_v4f16_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_lz_1darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_1darray_v4f16_f32(i32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_1darray_v4f16_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_l_1darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_1darray_v4f16_f32(i32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_1darray_v4f16_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_d_1darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_1darray_v4f16_f32(100, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_d_1darray_v4f16_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_lz_2darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_2darray_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_2darray_v4f16_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_l_2darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_2darray_v4f16_f32(100, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_2darray_v4f16_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_d_2darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_2darray_v4f16_f32(100, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_d_2darray_v4f16_f32' must be a constant integer}} +} + +float test_amdgcn_image_sample_lz_2d_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_2d_f32_f32(1, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_2d_f32_f32' must be a constant integer}} +} + +float test_amdgcn_image_sample_l_2d_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_2d_f32_f32(1, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_2d_f32_f32' must be a constant integer}} +} + +float test_amdgcn_image_sample_d_2d_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_2d_f32_f32(1, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_d_2d_f32_f32' must be a constant integer}} +} + +float test_amdgcn_image_sample_lz_2darray_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_2darray_f32_f32(1, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_2darray_f32_f32' must be a constant integer}} +} + +float test_amdgcn_image_sample_l_2darray_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_2darray_f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_2darray_f32_f32' must be a constant integer}} +} + +float test_amdgcn_image_sample_d_2darray_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_2darray_f32_f32(1, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_d_2darray_f32_f32' must be a constant integer}} +} diff --git a/clang/test/SemaOpenCL/builtins-extended-image-param-gfx942-err.cl b/clang/test/SemaOpenCL/builtins-extended-image-param-gfx942-err.cl new file mode 100644 index 000000000000..e60f8c70dc7c --- /dev/null +++ b/clang/test/SemaOpenCL/builtins-extended-image-param-gfx942-err.cl @@ -0,0 +1,227 @@ +// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx942 -verify=GFX94 -S -o - %s +// REQUIRES: amdgpu-registered-target + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +typedef int int4 __attribute__((ext_vector_type(4))); +typedef float float4 __attribute__((ext_vector_type(4))); +typedef half half4 __attribute__((ext_vector_type(4))); + +float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_r(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(1, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_gather4_lz_2d_v4f32_f32_r' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_g(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(2, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_gather4_lz_2d_v4f32_f32_g' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_b(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(4, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_gather4_lz_2d_v4f32_f32_b' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_a(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(8, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_gather4_lz_2d_v4f32_f32_a' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_sample_lz_1d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_1d_v4f32_f32(105, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_1d_v4f32_f32' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_sample_l_1d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_1d_v4f32_f32(100, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_1d_v4f32_f32' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_sample_d_1d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_1d_v4f32_f32(100, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_d_1d_v4f32_f32' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_sample_lz_2d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_2d_v4f32_f32(100, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_2d_v4f32_f32' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_sample_l_2d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_2d_v4f32_f32(10, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_2d_v4f32_f32' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_sample_d_2d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_2d_v4f32_f32(105, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_d_2d_v4f32_f32' needs target feature extended-image-insts}} +} +float4 test_amdgcn_image_sample_lz_3d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_3d_v4f32_f32(105, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_3d_v4f32_f32' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_sample_l_3d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_3d_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_3d_v4f32_f32' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_sample_d_3d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_3d_v4f32_f32(1, f32, f32, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_d_3d_v4f32_f32' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_sample_lz_cube_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_cube_v4f32_f32(1, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_cube_v4f32_f32' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_sample_l_cube_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_cube_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_cube_v4f32_f32' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_sample_lz_1darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_1darray_v4f32_f32(1, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_1darray_v4f32_f32' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_sample_l_1darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_1darray_v4f32_f32(1, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_1darray_v4f32_f32' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_sample_d_1darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_1darray_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_d_1darray_v4f32_f32' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_sample_lz_2darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_2darray_v4f32_f32(1, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_2darray_v4f32_f32' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_sample_l_2darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_2darray_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_2darray_v4f32_f32' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_sample_d_2darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_2darray_v4f32_f32(1, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_d_2darray_v4f32_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_lz_1d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_1d_v4f16_f32(105, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_1d_v4f16_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_l_1d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_1d_v4f16_f32(105, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_1d_v4f16_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_d_1d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_1d_v4f16_f32(105, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_d_1d_v4f16_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_lz_2d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_2d_v4f16_f32(100, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_2d_v4f16_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_l_2d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_2d_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_2d_v4f16_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_d_2d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_2d_v4f16_f32(100, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_d_2d_v4f16_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_lz_3d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_3d_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_3d_v4f16_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_l_3d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_3d_v4f16_f32(100, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_3d_v4f16_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_d_3d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_3d_v4f16_f32(100, f32, f32, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_d_3d_v4f16_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_lz_cube_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_cube_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_cube_v4f16_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_l_cube_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_cube_v4f16_f32(105, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_cube_v4f16_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_lz_1darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_1darray_v4f16_f32(105, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_1darray_v4f16_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_l_1darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_1darray_v4f16_f32(105, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_1darray_v4f16_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_d_1darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_1darray_v4f16_f32(100, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_d_1darray_v4f16_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_lz_2darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_2darray_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_2darray_v4f16_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_l_2darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_2darray_v4f16_f32(100, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_2darray_v4f16_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_d_2darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_2darray_v4f16_f32(100, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_d_2darray_v4f16_f32' needs target feature extended-image-insts}} +} + +float test_amdgcn_image_sample_lz_2d_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_2d_f32_f32(1, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_2d_f32_f32' needs target feature extended-image-insts}} +} + +float test_amdgcn_image_sample_l_2d_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_2d_f32_f32(1, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_2d_f32_f32' needs target feature extended-image-insts}} +} + +float test_amdgcn_image_sample_d_2d_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_2d_f32_f32(1, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_d_2d_f32_f32' needs target feature extended-image-insts}} +} + +float test_amdgcn_image_sample_lz_2darray_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_2darray_f32_f32(1, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_2darray_f32_f32' needs target feature extended-image-insts}} +} + +float test_amdgcn_image_sample_l_2darray_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_2darray_f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_2darray_f32_f32' needs target feature extended-image-insts}} +} + +float test_amdgcn_image_sample_d_2darray_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_2darray_f32_f32(1, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_d_2darray_f32_f32' needs target feature extended-image-insts}} +} diff --git a/clang/unittests/Format/AlignBracketsTest.cpp b/clang/unittests/Format/AlignBracketsTest.cpp index ea8db51a4d18..10ca5fb7da1c 100644 --- a/clang/unittests/Format/AlignBracketsTest.cpp +++ b/clang/unittests/Format/AlignBracketsTest.cpp @@ -28,7 +28,7 @@ TEST_F(AlignBracketsTest, AlignsAfterOpenBracket) { "SomeLongVariableName->someFunction(foooooooo(aaaaaaaaaaaaaaa,\n" " aaaaaaaaaaaaaaaaaaaaa));"); FormatStyle Style = getLLVMStyle(); - Style.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign; + Style.AlignAfterOpenBracket = false; verifyFormat("void aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa(\n" " aaaaaaaaaaa aaaaaaaa, aaaaaaaaa aaaaaaa) {}", Style); @@ -64,7 +64,7 @@ TEST_F(AlignBracketsTest, AlignsAfterOpenBracket) { Style); Style.ColumnLimit = 80; - Style.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak; + Style.BreakAfterOpenBracketFunction = true; Style.BinPackArguments = false; Style.BinPackParameters = FormatStyle::BPPS_OnePerLine; verifyFormat("void aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa(\n" @@ -115,7 +115,9 @@ TEST_F(AlignBracketsTest, AlignsAfterOpenBracket) { " XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXZZZZZZZZZZZZZZZZZZZZZZZZZ()));", Style); - Style.AlignAfterOpenBracket = FormatStyle::BAS_BlockIndent; + Style.BreakAfterOpenBracketFunction = true; + Style.BreakBeforeCloseBracketFunction = true; + Style.BreakBeforeCloseBracketBracedList = true; Style.BinPackArguments = false; Style.BinPackParameters = FormatStyle::BPPS_OnePerLine; verifyFormat("void aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa(\n" @@ -254,7 +256,8 @@ TEST_F(AlignBracketsTest, AlignAfterOpenBracketBlockIndent) { "argument5));", Style); - Style.AlignAfterOpenBracket = FormatStyle::BAS_BlockIndent; + Style.BreakAfterOpenBracketFunction = true; + Style.BreakBeforeCloseBracketFunction = true; verifyFormat(Short, Style); verifyFormat( @@ -378,7 +381,8 @@ TEST_F(AlignBracketsTest, AlignAfterOpenBracketBlockIndentIfStatement) { "}", Style); - Style.AlignAfterOpenBracket = FormatStyle::BAS_BlockIndent; + Style.BreakAfterOpenBracketFunction = true; + Style.BreakBeforeCloseBracketFunction = true; verifyFormat("if (foo()) {\n" " return;\n" @@ -440,7 +444,8 @@ TEST_F(AlignBracketsTest, AlignAfterOpenBracketBlockIndentForStatement) { "}", Style); - Style.AlignAfterOpenBracket = FormatStyle::BAS_BlockIndent; + Style.BreakAfterOpenBracketFunction = true; + Style.BreakBeforeCloseBracketFunction = true; verifyFormat("for (int i = 0; i < 5; ++i) {\n" " doSomething();\n" @@ -457,7 +462,8 @@ TEST_F(AlignBracketsTest, AlignAfterOpenBracketBlockIndentForStatement) { TEST_F(AlignBracketsTest, AlignAfterOpenBracketBlockIndentInitializers) { auto Style = getLLVMStyleWithColumns(60); - Style.AlignAfterOpenBracket = FormatStyle::BAS_BlockIndent; + Style.BreakAfterOpenBracketBracedList = true; + Style.BreakBeforeCloseBracketBracedList = true; // Aggregate initialization. verifyFormat("int LooooooooooooooooooooooooongVariable[2] = {\n" " 10000000, 20000000\n" @@ -611,13 +617,13 @@ TEST_F(AlignBracketsTest, AllowAllArgumentsOnNextLineDontAlign) { StringRef Input = "functionCall(paramA, paramB, paramC);\n" "void functionDecl(int A, int B, int C);"; Style.AllowAllArgumentsOnNextLine = false; - Style.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign; + Style.AlignAfterOpenBracket = false; verifyFormat(StringRef("functionCall(paramA, paramB,\n" " paramC);\n" "void functionDecl(int A, int B,\n" " int C);"), Input, Style); - Style.AlignAfterOpenBracket = FormatStyle::BAS_Align; + Style.AlignAfterOpenBracket = true; verifyFormat(StringRef("functionCall(paramA, paramB,\n" " paramC);\n" "void functionDecl(int A, int B,\n" @@ -625,13 +631,14 @@ TEST_F(AlignBracketsTest, AllowAllArgumentsOnNextLineDontAlign) { Input, Style); // However, BAS_AlwaysBreak and BAS_BlockIndent should take precedence over // AllowAllArgumentsOnNextLine. - Style.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak; + Style.BreakAfterOpenBracketFunction = true; verifyFormat(StringRef("functionCall(\n" " paramA, paramB, paramC);\n" "void functionDecl(\n" " int A, int B, int C);"), Input, Style); - Style.AlignAfterOpenBracket = FormatStyle::BAS_BlockIndent; + Style.BreakAfterOpenBracketFunction = true; + Style.BreakBeforeCloseBracketFunction = true; verifyFormat("functionCall(\n" " paramA, paramB, paramC\n" ");\n" @@ -639,11 +646,12 @@ TEST_F(AlignBracketsTest, AllowAllArgumentsOnNextLineDontAlign) { " int A, int B, int C\n" ");", Input, Style); + Style.BreakBeforeCloseBracketFunction = false; // When AllowAllArgumentsOnNextLine is set, we prefer breaking before the // first argument. Style.AllowAllArgumentsOnNextLine = true; - Style.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak; + Style.BreakAfterOpenBracketFunction = true; verifyFormat(StringRef("functionCall(\n" " paramA, paramB, paramC);\n" "void functionDecl(\n" @@ -651,13 +659,14 @@ TEST_F(AlignBracketsTest, AllowAllArgumentsOnNextLineDontAlign) { Input, Style); // It wouldn't fit on one line with aligned parameters so this setting // doesn't change anything for BAS_Align. - Style.AlignAfterOpenBracket = FormatStyle::BAS_Align; + Style.AlignAfterOpenBracket = true; + Style.BreakAfterOpenBracketFunction = false; verifyFormat(StringRef("functionCall(paramA, paramB,\n" " paramC);\n" "void functionDecl(int A, int B,\n" " int C);"), Input, Style); - Style.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign; + Style.BreakAfterOpenBracketFunction = true; verifyFormat(StringRef("functionCall(\n" " paramA, paramB, paramC);\n" "void functionDecl(\n" @@ -678,13 +687,14 @@ TEST_F(AlignBracketsTest, FormatsDeclarationBreakAlways) { // Ensure AlignAfterOpenBracket interacts correctly with BinPackParameters set // to BPPS_AlwaysOnePerLine. - BreakAlways.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak; + BreakAlways.BreakAfterOpenBracketFunction = true; verifyFormat( "void someLongFunctionName(\n" " int aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa,\n" " int b);", BreakAlways); - BreakAlways.AlignAfterOpenBracket = FormatStyle::BAS_BlockIndent; + BreakAlways.BreakAfterOpenBracketFunction = true; + BreakAlways.BreakBeforeCloseBracketFunction = true; verifyFormat( "void someLongFunctionName(\n" " int aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa,\n" @@ -734,7 +744,7 @@ TEST_F(AlignBracketsTest, FormatsDefinitionBreakAlways) { // Ensure AlignAfterOpenBracket interacts correctly with BinPackParameters set // to BPPS_AlwaysOnePerLine. - BreakAlways.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak; + BreakAlways.BreakAfterOpenBracketFunction = true; verifyFormat( "void someLongFunctionName(\n" " int aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa,\n" @@ -743,7 +753,8 @@ TEST_F(AlignBracketsTest, FormatsDefinitionBreakAlways) { " aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa, b);\n" "}", BreakAlways); - BreakAlways.AlignAfterOpenBracket = FormatStyle::BAS_BlockIndent; + BreakAlways.BreakAfterOpenBracketFunction = true; + BreakAlways.BreakBeforeCloseBracketFunction = true; verifyFormat( "void someLongFunctionName(\n" " int aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa,\n" @@ -761,17 +772,17 @@ TEST_F(AlignBracketsTest, ParenthesesAndOperandAlignment) { verifyFormat("int a = f(aaaaaaaaaaaaaaaaaaaaaa &&\n" " bbbbbbbbbbbbbbbbbbbbbb);", Style); - Style.AlignAfterOpenBracket = FormatStyle::BAS_Align; + Style.AlignAfterOpenBracket = true; Style.AlignOperands = FormatStyle::OAS_DontAlign; verifyFormat("int a = f(aaaaaaaaaaaaaaaaaaaaaa &&\n" " bbbbbbbbbbbbbbbbbbbbbb);", Style); - Style.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign; + Style.AlignAfterOpenBracket = false; Style.AlignOperands = FormatStyle::OAS_Align; verifyFormat("int a = f(aaaaaaaaaaaaaaaaaaaaaa &&\n" " bbbbbbbbbbbbbbbbbbbbbb);", Style); - Style.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign; + Style.AlignAfterOpenBracket = false; Style.AlignOperands = FormatStyle::OAS_DontAlign; verifyFormat("int a = f(aaaaaaaaaaaaaaaaaaaaaa &&\n" " bbbbbbbbbbbbbbbbbbbbbb);", @@ -781,7 +792,10 @@ TEST_F(AlignBracketsTest, ParenthesesAndOperandAlignment) { TEST_F(AlignBracketsTest, BlockIndentAndNamespace) { auto Style = getLLVMStyleWithColumns(120); Style.AllowShortNamespacesOnASingleLine = true; - Style.AlignAfterOpenBracket = FormatStyle::BAS_BlockIndent; + Style.BreakAfterOpenBracketFunction = true; + Style.BreakAfterOpenBracketBracedList = true; + Style.BreakBeforeCloseBracketFunction = true; + Style.BreakBeforeCloseBracketBracedList = true; verifyNoCrash( "namespace {\n" diff --git a/clang/unittests/Format/ConfigParseTest.cpp b/clang/unittests/Format/ConfigParseTest.cpp index 6488e38badee..43b21176962e 100644 --- a/clang/unittests/Format/ConfigParseTest.cpp +++ b/clang/unittests/Format/ConfigParseTest.cpp @@ -172,6 +172,16 @@ TEST(ConfigParseTest, ParsesConfigurationBools) { CHECK_PARSE_BOOL(BinPackLongBracedList); CHECK_PARSE_BOOL(BreakAdjacentStringLiterals); CHECK_PARSE_BOOL(BreakAfterJavaFieldAnnotations); + CHECK_PARSE_BOOL(BreakAfterOpenBracketBracedList); + CHECK_PARSE_BOOL(BreakAfterOpenBracketFunction); + CHECK_PARSE_BOOL(BreakAfterOpenBracketIf); + CHECK_PARSE_BOOL(BreakAfterOpenBracketLoop); + CHECK_PARSE_BOOL(BreakAfterOpenBracketSwitch); + CHECK_PARSE_BOOL(BreakBeforeCloseBracketBracedList); + CHECK_PARSE_BOOL(BreakBeforeCloseBracketFunction); + CHECK_PARSE_BOOL(BreakBeforeCloseBracketIf); + CHECK_PARSE_BOOL(BreakBeforeCloseBracketLoop); + CHECK_PARSE_BOOL(BreakBeforeCloseBracketSwitch); CHECK_PARSE_BOOL(BreakBeforeTemplateCloser); CHECK_PARSE_BOOL(BreakBeforeTernaryOperators); CHECK_PARSE_BOOL(BreakStringLiterals); @@ -533,20 +543,23 @@ TEST(ConfigParseTest, ParsesConfiguration) { CHECK_PARSE("EnumTrailingComma: Remove", EnumTrailingComma, FormatStyle::ETC_Remove); - Style.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak; - CHECK_PARSE("AlignAfterOpenBracket: Align", AlignAfterOpenBracket, - FormatStyle::BAS_Align); - CHECK_PARSE("AlignAfterOpenBracket: DontAlign", AlignAfterOpenBracket, - FormatStyle::BAS_DontAlign); + Style.AlignAfterOpenBracket = false; + CHECK_PARSE("AlignAfterOpenBracket: Align", AlignAfterOpenBracket, true); + CHECK_PARSE("AlignAfterOpenBracket: DontAlign", AlignAfterOpenBracket, false); + // For backward compatibility: CHECK_PARSE("AlignAfterOpenBracket: AlwaysBreak", AlignAfterOpenBracket, - FormatStyle::BAS_AlwaysBreak); + true); + CHECK_PARSE("AlignAfterOpenBracket: AlwaysBreak\n" + "BreakAfterOpenBracketIf: false", + BreakAfterOpenBracketIf, false); + CHECK_PARSE("BreakAfterOpenBracketLoop: true\n" + "AlignAfterOpenBracket: AlwaysBreak", + BreakAfterOpenBracketLoop, true); + CHECK_PARSE("AlignAfterOpenBracket: false", AlignAfterOpenBracket, false); CHECK_PARSE("AlignAfterOpenBracket: BlockIndent", AlignAfterOpenBracket, - FormatStyle::BAS_BlockIndent); - // For backward compatibility: - CHECK_PARSE("AlignAfterOpenBracket: false", AlignAfterOpenBracket, - FormatStyle::BAS_DontAlign); - CHECK_PARSE("AlignAfterOpenBracket: true", AlignAfterOpenBracket, - FormatStyle::BAS_Align); + true); + Style.AlignAfterOpenBracket = false; + CHECK_PARSE("AlignAfterOpenBracket: true", AlignAfterOpenBracket, true); Style.AlignEscapedNewlines = FormatStyle::ENAS_Left; CHECK_PARSE("AlignEscapedNewlines: DontAlign", AlignEscapedNewlines, diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index d45babe1b82a..ca9e7925e5e9 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -5126,7 +5126,8 @@ TEST_F(FormatTest, DesignatedInitializers) { TEST_F(FormatTest, BracedInitializerIndentWidth) { auto Style = getLLVMStyleWithColumns(60); Style.BinPackArguments = true; - Style.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak; + Style.BreakAfterOpenBracketFunction = true; + Style.BreakAfterOpenBracketBracedList = true; Style.BracedInitializerIndentWidth = 6; // Non-initializing braces are unaffected by BracedInitializerIndentWidth. @@ -5302,7 +5303,8 @@ TEST_F(FormatTest, BracedInitializerIndentWidth) { Style); // Aligning after open braces unaffected by BracedInitializerIndentWidth. - Style.AlignAfterOpenBracket = FormatStyle::BAS_Align; + Style.AlignAfterOpenBracket = true; + Style.BreakAfterOpenBracketBracedList = false; verifyFormat("SomeStruct s{\"xxxxxxxxxxxxx\", \"yyyyyyyyyyyyy\",\n" " \"zzzzzzzzzzzzz\"};", Style); @@ -7459,7 +7461,7 @@ TEST_F(FormatTest, ExpressionIndentationBreakingBeforeOperators) { Style.IndentWidth = 4; Style.TabWidth = 4; Style.UseTab = FormatStyle::UT_Always; - Style.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign; + Style.AlignAfterOpenBracket = false; Style.AlignOperands = FormatStyle::OAS_DontAlign; verifyFormat("return someVeryVeryLongConditionThatBarelyFitsOnALine\n" "\t&& (someOtherLongishConditionPart1\n" @@ -7470,7 +7472,7 @@ TEST_F(FormatTest, ExpressionIndentationBreakingBeforeOperators) { Style); Style = getLLVMStyleWithColumns(20); - Style.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak; + Style.BreakAfterOpenBracketFunction = true; Style.BinPackParameters = FormatStyle::BPPS_OnePerLine; Style.BreakBeforeBinaryOperators = FormatStyle::BOS_NonAssignment; Style.ContinuationIndentWidth = 2; @@ -7632,7 +7634,7 @@ TEST_F(FormatTest, NoOperandAlignment) { " * cccccccccccccccccccccccccccccccccccc;", Style); - Style.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign; + Style.AlignAfterOpenBracket = false; verifyFormat("return (a > b\n" " // comment1\n" " // comment2\n" @@ -11248,7 +11250,7 @@ TEST_F(FormatTest, BreakBeforeTemplateCloser) { TEST_F(FormatTest, WrapsTemplateParameters) { FormatStyle Style = getLLVMStyle(); - Style.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign; + Style.AlignAfterOpenBracket = false; Style.BreakBeforeBinaryOperators = FormatStyle::BOS_None; verifyFormat( "template <typename... a> struct q {};\n" @@ -11256,7 +11258,7 @@ TEST_F(FormatTest, WrapsTemplateParameters) { " aaaaaaaaaaaaaaaaa, aaaaaaaaaaaaaaaaa, aaaaaaaaaaaaaaaaa>\n" " y;", Style); - Style.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign; + Style.AlignAfterOpenBracket = false; Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All; verifyFormat( "template <typename... a> struct r {};\n" @@ -11264,7 +11266,7 @@ TEST_F(FormatTest, WrapsTemplateParameters) { " aaaaaaaaaaaaaaaaa, aaaaaaaaaaaaaaaaa, aaaaaaaaaaaaaaaaa>\n" " y;", Style); - Style.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak; + Style.BreakAfterOpenBracketFunction = true; Style.BreakBeforeBinaryOperators = FormatStyle::BOS_None; verifyFormat("template <typename... a> struct s {};\n" "extern s<\n" @@ -11274,7 +11276,7 @@ TEST_F(FormatTest, WrapsTemplateParameters) { "aaaaaaaaaaaaaaaaaaaaaa>\n" " y;", Style); - Style.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak; + Style.BreakAfterOpenBracketFunction = true; Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All; verifyFormat("template <typename... a> struct t {};\n" "extern t<\n" @@ -14302,7 +14304,7 @@ TEST_F(FormatTest, LayoutCxx11BraceInitializers) { "};", NoBinPacking); - NoBinPacking.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak; + NoBinPacking.BreakAfterOpenBracketBracedList = true; verifyFormat("static uint8 CddDp83848Reg[] = {\n" " CDDDP83848_BMCR_REGISTER,\n" " CDDDP83848_BMSR_REGISTER,\n" @@ -15972,13 +15974,14 @@ TEST_F(FormatTest, BreaksStringLiteralOperands) { // In a function call with two operands, with AlignAfterOpenBracket enabled, // the first must be broken with a line break before it. FormatStyle Style = getLLVMStyleWithColumns(25); - Style.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak; + Style.BreakAfterOpenBracketFunction = true; verifyFormat("someFunction(\n" " \"long long long \"\n" " \"long\",\n" " a);", "someFunction(\"long long long long\", a);", Style); - Style.AlignAfterOpenBracket = FormatStyle::BAS_BlockIndent; + Style.BreakAfterOpenBracketFunction = true; + Style.BreakBeforeCloseBracketFunction = true; verifyFormat("someFunction(\n" " \"long long long \"\n" " \"long\",\n" @@ -17773,7 +17776,7 @@ TEST_F(FormatTest, ConfigurableSpacesInParens) { Spaces.ColumnLimit = 80; Spaces.IndentWidth = 4; - Spaces.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak; + Spaces.BreakAfterOpenBracketFunction = true; verifyFormat("void foo( ) {\n" " size_t foo = (*(function))(\n" " Foooo, Barrrrr, Foooo, Barrrr, FoooooooooLooooong, " @@ -17798,7 +17801,8 @@ TEST_F(FormatTest, ConfigurableSpacesInParens) { "}", Spaces); - Spaces.AlignAfterOpenBracket = FormatStyle::BAS_BlockIndent; + Spaces.BreakAfterOpenBracketFunction = true; + Spaces.BreakBeforeCloseBracketFunction = true; verifyFormat("void foo( ) {\n" " size_t foo = (*(function))(\n" " Foooo, Barrrrr, Foooo, Barrrr, FoooooooooLooooong, " @@ -22827,7 +22831,7 @@ TEST_F(FormatTest, ConstructorInitializerIndentWidth) { ": aaaaaaaaaaaaa(aaaaaaaaaaaaaa), aaaaaaaaaaaaa(aaaaaaaaaaaaaa),\n" " aaaaaaaaaaaaa(aaaaaaaaaaaaaa) {}", Style); - Style.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak; + Style.BreakAfterOpenBracketFunction = true; verifyFormat( "SomeLongTemplateVariableName<\n" " aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa, aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa>", @@ -24082,7 +24086,7 @@ TEST_F(FormatTest, FormatsLambdas) { " return aFunkyFunctionCall(qux);\n" " }} {}", Style); - Style.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak; + Style.BreakAfterOpenBracketFunction = true; // FIXME: The following test should pass, but fails at the time of writing. #if 0 // As long as all the non-lambda arguments fit on a single line, AlwaysBreak diff --git a/clang/unittests/Format/FormatTestJS.cpp b/clang/unittests/Format/FormatTestJS.cpp index 91577b9a4916..4847151c14b3 100644 --- a/clang/unittests/Format/FormatTestJS.cpp +++ b/clang/unittests/Format/FormatTestJS.cpp @@ -2883,7 +2883,7 @@ TEST_F(FormatTestJS, DontBreakFieldsAsGoToLabels) { TEST_F(FormatTestJS, BreakAfterOpenBracket) { auto Style = getGoogleStyle(FormatStyle::LK_JavaScript); - EXPECT_EQ(Style.AlignAfterOpenBracket, FormatStyle::BAS_AlwaysBreak); + EXPECT_EQ(Style.BreakAfterOpenBracketFunction, true); verifyFormat("ctrl.onCopy(/** @type {!WizEvent}*/ (\n" " {event, targetElement: {el: () => selectedElement}}));", Style); diff --git a/clang/unittests/Tooling/RangeSelectorTest.cpp b/clang/unittests/Tooling/RangeSelectorTest.cpp index adf5e74ea319..a1fcbb023832 100644 --- a/clang/unittests/Tooling/RangeSelectorTest.cpp +++ b/clang/unittests/Tooling/RangeSelectorTest.cpp @@ -527,6 +527,31 @@ TEST(RangeSelectorTest, NameOpDeclRefError) { AllOf(HasSubstr(Ref), HasSubstr("requires property 'identifier'"))))); } +TEST(RangeSelectorTest, NameOpDeclInMacroArg) { + StringRef Code = R"cc( + #define MACRO(name) int name; + MACRO(x) + )cc"; + const char *ID = "id"; + TestMatch Match = matchCode(Code, varDecl().bind(ID)); + EXPECT_THAT_EXPECTED(select(name(ID), Match), HasValue("x")); +} + +TEST(RangeSelectorTest, NameOpDeclInMacroBodyError) { + StringRef Code = R"cc( + #define MACRO int x; + MACRO + )cc"; + const char *ID = "id"; + TestMatch Match = matchCode(Code, varDecl().bind(ID)); + EXPECT_THAT_EXPECTED( + name(ID)(Match.Result), + Failed<StringError>(testing::Property( + &StringError::getMessage, + AllOf(HasSubstr("range selected by name(node id="), + HasSubstr("' is different from decl name 'x'"))))); +} + TEST(RangeSelectorTest, CallArgsOp) { const StringRef Code = R"cc( struct C { diff --git a/clang/www/cxx_dr_status.html b/clang/www/cxx_dr_status.html index ae9b28ee625c..0312c9dfc066 100755 --- a/clang/www/cxx_dr_status.html +++ b/clang/www/cxx_dr_status.html @@ -81,7 +81,7 @@ <td><a href="https://cplusplus.github.io/CWG/issues/6.html">6</a></td> <td>NAD</td> <td>Should the optimization that allows a class object to alias another object also allow the case of a parameter in an inline function to alias its argument?</td> - <td class="unknown" align="center">Unknown</td> + <td class="full" align="center">Yes</td> </tr> <tr id="7"> <td><a href="https://cplusplus.github.io/CWG/issues/7.html">7</a></td> @@ -1318,7 +1318,7 @@ accessible?</td> <td><a href="https://cplusplus.github.io/CWG/issues/212.html">212</a></td> <td>CD4</td> <td>Implicit instantiation is not described clearly enough</td> - <td class="unknown" align="center">Unknown</td> + <td class="full" align="center">Yes</td> </tr> <tr id="213"> <td><a href="https://cplusplus.github.io/CWG/issues/213.html">213</a></td> @@ -1438,7 +1438,7 @@ accessible?</td> <td><a href="https://cplusplus.github.io/CWG/issues/232.html">232</a></td> <td>NAD</td> <td>Is indirection through a null pointer undefined behavior?</td> - <td class="unknown" align="center">Unknown</td> + <td class="none" align="center">Duplicate of <a href="#2823">2823</a></td> </tr> <tr id="233"> <td><a href="https://cplusplus.github.io/CWG/issues/233.html">233</a></td> @@ -16790,7 +16790,7 @@ objects</td> <td><a href="https://cplusplus.github.io/CWG/issues/2823.html">2823</a></td> <td>CD7</td> <td>Implicit undefined behavior when dereferencing pointers</td> - <td class="unknown" align="center">Unknown</td> + <td class="none" align="center">No</td> </tr> <tr id="2824"> <td><a href="https://cplusplus.github.io/CWG/issues/2824.html">2824</a></td> diff --git a/compiler-rt/lib/lsan/lsan_allocator.h b/compiler-rt/lib/lsan/lsan_allocator.h index 556b9f56a4a4..2d0ea0b46fe0 100644 --- a/compiler-rt/lib/lsan/lsan_allocator.h +++ b/compiler-rt/lib/lsan/lsan_allocator.h @@ -93,6 +93,10 @@ using LSanSizeClassMap = DefaultSizeClassMap; const uptr kAllocatorSpace = 0x600000000000ULL; const uptr kAllocatorSize = 0x40000000000ULL; // 4T. using LSanSizeClassMap = DefaultSizeClassMap; +# elif SANITIZER_ANDROID && defined(__aarch64__) +const uptr kAllocatorSpace = 0x3000000000ULL; +const uptr kAllocatorSize = 0x2000000000ULL; +using LSanSizeClassMap = VeryCompactSizeClassMap; # else const uptr kAllocatorSpace = 0x500000000000ULL; const uptr kAllocatorSize = 0x40000000000ULL; // 4T. diff --git a/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate-warnings.c b/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate-warnings.c index 5069c6340b64..25022f241a6d 100644 --- a/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate-warnings.c +++ b/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate-warnings.c @@ -1,6 +1,6 @@ // Disable full debug info and verify that we get warnings during merging -// RUN: %clang_pgogen -o %t -gline-tables-only -mllvm --debug-info-correlate -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp +// RUN: %clang_pgogen -o %t -gline-tables-only -mllvm --profile-correlate=debug-info -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp // RUN: env LLVM_PROFILE_FILE=%t.proflite %run %t // RUN: llvm-profdata merge -o %t.profdata --debug-info=%t %t.proflite --max-debug-info-correlation-warnings=2 2>&1 >/dev/null | FileCheck %s --check-prefixes=CHECK,LIMIT --implicit-check-not=warning // RUN: llvm-profdata merge -o %t.profdata --debug-info=%t %t.proflite --max-debug-info-correlation-warnings=0 2>&1 >/dev/null | FileCheck %s --check-prefixes=CHECK,NOLIMIT --implicit-check-not=warning diff --git a/compiler-rt/test/tsan/ignore_lib0.cpp b/compiler-rt/test/tsan/ignore_lib0.cpp index bca4ea5e582a..9c4919022b51 100644 --- a/compiler-rt/test/tsan/ignore_lib0.cpp +++ b/compiler-rt/test/tsan/ignore_lib0.cpp @@ -5,7 +5,7 @@ // RUN: %clangxx_tsan -O1 %s -L%t-dir -lignore_lib0 %link_libcxx_tsan -o %t // RUN: echo running w/o suppressions: // RUN: echo -n %t-dir > %t.ld_library_path -// RUN: python -c "if 'LD_LIBRARY_PATH' in __import__('os').environ: print(':' + __import__('os').environ['LD_LIBRARY_PATH'])" | tr -d '\n' >> %t.ld_library_path +// RUN: %python -c "if 'LD_LIBRARY_PATH' in __import__('os').environ: print(':' + __import__('os').environ['LD_LIBRARY_PATH'], end='')" >> %t.ld_library_path // RUN: env LD_LIBRARY_PATH=%{readfile:%t.ld_library_path} %deflake %run %t | FileCheck %s --check-prefix=CHECK-NOSUPP // RUN: echo running with suppressions: // RUN: env LD_LIBRARY_PATH=%{readfile:%t.ld_library_path} %env_tsan_opts=suppressions='%s.supp' %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-WITHSUPP diff --git a/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.h b/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.h index 4817ed933ba0..3167c554abbd 100644 --- a/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.h +++ b/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.h @@ -60,6 +60,8 @@ struct OpenACCMappableModel getOffsetInBytes(mlir::Type type, mlir::Value var, mlir::ValueRange accBounds, const mlir::DataLayout &dataLayout) const; + bool hasUnknownDimensions(mlir::Type type) const; + llvm::SmallVector<mlir::Value> generateAccBounds(mlir::Type type, mlir::Value var, mlir::OpBuilder &builder) const; diff --git a/flang/include/flang/Semantics/dump-expr.h b/flang/include/flang/Semantics/dump-expr.h index 2dbd4cb60be5..5a78e13b19e5 100644 --- a/flang/include/flang/Semantics/dump-expr.h +++ b/flang/include/flang/Semantics/dump-expr.h @@ -48,10 +48,11 @@ private: // "... [with T = xyz; std::string_view = ...]" #ifdef __clang__ std::string_view front("[T = "); + std::string_view back("]"); #else std::string_view front("[with T = "); -#endif std::string_view back("; std::string_view ="); +#endif #elif defined(_MSC_VER) #define DUMP_EXPR_SHOW_TYPE diff --git a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp index ed9e41c74375..ae0f5fb8197f 100644 --- a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp +++ b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp @@ -193,6 +193,28 @@ OpenACCMappableModel<fir::PointerType>::getOffsetInBytes( mlir::Type type, mlir::Value var, mlir::ValueRange accBounds, const mlir::DataLayout &dataLayout) const; +template <typename Ty> +bool OpenACCMappableModel<Ty>::hasUnknownDimensions(mlir::Type type) const { + assert(fir::isa_ref_type(type) && "expected FIR reference type"); + return fir::hasDynamicSize(fir::unwrapRefType(type)); +} + +template bool OpenACCMappableModel<fir::ReferenceType>::hasUnknownDimensions( + mlir::Type type) const; + +template bool OpenACCMappableModel<fir::HeapType>::hasUnknownDimensions( + mlir::Type type) const; + +template bool OpenACCMappableModel<fir::PointerType>::hasUnknownDimensions( + mlir::Type type) const; + +template <> +bool OpenACCMappableModel<fir::BaseBoxType>::hasUnknownDimensions( + mlir::Type type) const { + // Descriptor-based entities have dimensions encoded. + return false; +} + static llvm::SmallVector<mlir::Value> generateSeqTyAccBounds(fir::SequenceType seqType, mlir::Value var, mlir::OpBuilder &builder) { diff --git a/flang/lib/Parser/prescan.cpp b/flang/lib/Parser/prescan.cpp index 4739da0676fa..fd69404f313d 100644 --- a/flang/lib/Parser/prescan.cpp +++ b/flang/lib/Parser/prescan.cpp @@ -557,7 +557,7 @@ bool Prescanner::MustSkipToEndOfLine() const { return true; // skip over ignored columns in right margin (73:80) } else if (*at_ == '!' && !inCharLiteral_ && (!inFixedForm_ || tabInCurrentLine_ || column_ != 6)) { - return !IsCompilerDirectiveSentinel(at_); + return !IsCompilerDirectiveSentinel(at_ + 1); } else { return false; } diff --git a/flang/test/Fir/OpenACC/openacc-mappable.fir b/flang/test/Fir/OpenACC/openacc-mappable.fir index 05df35a48290..00fe2574da62 100644 --- a/flang/test/Fir/OpenACC/openacc-mappable.fir +++ b/flang/test/Fir/OpenACC/openacc-mappable.fir @@ -21,11 +21,13 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<f16 = dense<16> : vector<2xi64>, // CHECK: Mappable: !fir.box<!fir.array<10xf32>> // CHECK: Type category: array // CHECK: Size: 40 + // CHECK: Has unknown dimensions: false // CHECK: Visiting: %{{.*}} = acc.copyin varPtr(%{{.*}} : !fir.ref<!fir.array<10xf32>>) -> !fir.ref<!fir.array<10xf32>> {name = "arr", structured = false} // CHECK: Pointer-like and Mappable: !fir.ref<!fir.array<10xf32>> // CHECK: Type category: array // CHECK: Size: 40 + // CHECK: Has unknown dimensions: false // This second test exercises argument of explicit-shape arrays in following forms: // `real :: arr1(nn), arr2(2:nn), arr3(10)` @@ -62,6 +64,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<f16 = dense<16> : vector<2xi64>, // CHECK: Visiting: %{{.*}} = acc.copyin varPtr(%{{.*}} : !fir.ref<!fir.array<?xf32>>) -> !fir.ref<!fir.array<?xf32>> {name = "arr1", structured = false} // CHECK: Pointer-like and Mappable: !fir.ref<!fir.array<?xf32>> // CHECK: Type category: array + // CHECK: Has unknown dimensions: true // CHECK: Shape: %{{.*}} = fir.shape %[[EXTENT1:.*]] : (index) -> !fir.shape<1> // CHECK: Bound[0]: %{{.*}} = acc.bounds lowerbound(%[[LB1:.*]] : index) upperbound(%[[UB1:.*]] : index) extent(%{{.*}} : index) stride(%c1{{.*}} : index) startIdx(%c1{{.*}} : index) // CHECK: Lower bound: %[[LB1]] = arith.constant 0 : index @@ -70,6 +73,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<f16 = dense<16> : vector<2xi64>, // CHECK: Visiting: %{{.*}} = acc.copyin varPtr(%{{.*}} : !fir.ref<!fir.array<?xf32>>) -> !fir.ref<!fir.array<?xf32>> {name = "arr2", structured = false} // CHECK: Pointer-like and Mappable: !fir.ref<!fir.array<?xf32>> // CHECK: Type category: array + // CHECK: Has unknown dimensions: true // CHECK: Shape: %{{.*}} = fir.shape_shift %c2{{.*}}, %[[EXTENT2:.*]] : (index, index) -> !fir.shapeshift<1> // CHECK: Bound[0]: %{{.*}} = acc.bounds lowerbound(%[[LB2:.*]] : index) upperbound(%[[UB2:.*]] : index) extent(%{{.*}} : index) stride(%c1{{.*}} : index) startIdx(%c2{{.*}} : index) // CHECK: Lower bound: %[[LB2]] = arith.constant 0 : index @@ -80,6 +84,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<f16 = dense<16> : vector<2xi64>, // CHECK: Type category: array // CHECK: Size: 40 // CHECK: Offset: 0 + // CHECK: Has unknown dimensions: false // CHECK: Shape: %{{.*}} = fir.shape %[[EXTENT3:.*]] : (index) -> !fir.shape<1> // CHECK: Bound[0]: %{{.*}} = acc.bounds lowerbound(%[[LB3:.*]] : index) upperbound(%[[UB3:.*]] : index) extent(%c10{{.*}} : index) stride(%c1{{.*}} : index) startIdx(%c1{{.*}} : index) // CHECK: Lower bound: %[[LB3]] = arith.constant 0 : index diff --git a/flang/test/Parser/inline-directives.f90 b/flang/test/Parser/inline-directives.f90 new file mode 100644 index 000000000000..24d4f95759a6 --- /dev/null +++ b/flang/test/Parser/inline-directives.f90 @@ -0,0 +1,29 @@ +! RUN: %flang_fc1 -fdebug-unparse %s 2>&1 | FileCheck %s + +! Test that checks whether compiler directives can be inlined without mistaking it as comment. + +module m +contains +#define MACRO(X) subroutine func1(X); real(2) :: X; !dir$ ignore_tkr(d) X; end subroutine func1; +MACRO(foo) + +!CHECK: SUBROUTINE func1 (foo) +!CHECK: !DIR$ IGNORE_TKR (d) foo +!CHECK: END SUBROUTINE func1 + + subroutine func2(foo) + real(2) :: foo; !dir$ ignore_tkr(d) foo; + end subroutine func2 + +!CHECK: SUBROUTINE func2 (foo) +!CHECK: !DIR$ IGNORE_TKR (d) foo +!CHECK: END SUBROUTINE func2 + + subroutine func3(foo) + real(2) :: foo; !dir$ ignore_tkr(d) foo; end subroutine func3; + +!CHECK: SUBROUTINE func3 (foo) +!CHECK: !DIR$ IGNORE_TKR (d) foo +!CHECK: END SUBROUTINE func3 + +end module diff --git a/flang/test/lib/OpenACC/TestOpenACCInterfaces.cpp b/flang/test/lib/OpenACC/TestOpenACCInterfaces.cpp index 9a80e3b1a9ae..072aee5ba269 100644 --- a/flang/test/lib/OpenACC/TestOpenACCInterfaces.cpp +++ b/flang/test/lib/OpenACC/TestOpenACCInterfaces.cpp @@ -100,6 +100,10 @@ struct TestFIROpenACCInterfaces } } + llvm::errs() << "\t\tHas unknown dimensions: " + << (mappableTy.hasUnknownDimensions() ? "true" : "false") + << "\n"; + if (auto declareOp = dyn_cast_if_present<hlfir::DeclareOp>(var.getDefiningOp())) { llvm::errs() << "\t\tShape: " << declareOp.getShape() << "\n"; diff --git a/libc/CMakeLists.txt b/libc/CMakeLists.txt index 14718e2090bd..ae555a256ba6 100644 --- a/libc/CMakeLists.txt +++ b/libc/CMakeLists.txt @@ -363,7 +363,7 @@ elseif(LLVM_LIBC_FULL_BUILD) message(FATAL_ERROR "${LIBC_CONFIG_PATH}/headers.txt file not found and fullbuild requested.") endif() -# Check exclude.txt that appends to LIBC_EXCLUDE_ENTRYPOINTS list +# Check exclude.txt that appends to TARGET_LLVMLIBC_REMOVED_ENTRYPOINTS list if(EXISTS "${LIBC_CONFIG_PATH}/exclude.txt") include("${LIBC_CONFIG_PATH}/exclude.txt") endif() diff --git a/libc/config/linux/x86_64/exclude.txt b/libc/config/linux/x86_64/exclude.txt index 2c218b753b17..a0686310d21a 100644 --- a/libc/config/linux/x86_64/exclude.txt +++ b/libc/config/linux/x86_64/exclude.txt @@ -19,3 +19,11 @@ if(NOT has_sys_random) ) endif() endif() + +include(CheckSymbolExists) +check_symbol_exists(SYS_faccessat2 "sys/syscall.h" HAVE_SYS_FACCESSAT2) +if(NOT HAVE_SYS_FACCESSAT2) + list(APPEND TARGET_LLVMLIBC_REMOVED_ENTRYPOINTS + libc.src.unistd.faccessat + ) +endif() diff --git a/libc/include/llvm-libc-types/__barrier_type.h b/libc/include/llvm-libc-types/__barrier_type.h index 59712619e917..5752f832f04b 100644 --- a/libc/include/llvm-libc-types/__barrier_type.h +++ b/libc/include/llvm-libc-types/__barrier_type.h @@ -9,6 +9,8 @@ #ifndef LLVM_LIBC_TYPES__BARRIER_TYPE_H #define LLVM_LIBC_TYPES__BARRIER_TYPE_H +#include <stdbool.h> + typedef struct __attribute__((aligned(8 /* alignof (Barrier) */))) { unsigned expected; unsigned waiting; diff --git a/libc/include/llvm-libc-types/pthread_barrierattr_t.h b/libc/include/llvm-libc-types/pthread_barrierattr_t.h index 064be5bfb672..b62fdc0f72e1 100644 --- a/libc/include/llvm-libc-types/pthread_barrierattr_t.h +++ b/libc/include/llvm-libc-types/pthread_barrierattr_t.h @@ -9,6 +9,8 @@ #ifndef LLVM_LIBC_TYPES_PTHREAD_BARRIERATTR_T_H #define LLVM_LIBC_TYPES_PTHREAD_BARRIERATTR_T_H +#include <stdbool.h> + typedef struct { bool pshared; } pthread_barrierattr_t; diff --git a/libc/include/locale.yaml b/libc/include/locale.yaml index 4566984ad83a..3c3998eb07aa 100644 --- a/libc/include/locale.yaml +++ b/libc/include/locale.yaml @@ -1,7 +1,7 @@ header: locale.h header_template: locale.h.def macros: - - macro_name: NULL + - macro_name: "NULL" macro_header: null-macro.h types: - type_name: locale_t diff --git a/libc/include/stdio.yaml b/libc/include/stdio.yaml index 394437ba3bbc..c50b4ecb0bf0 100644 --- a/libc/include/stdio.yaml +++ b/libc/include/stdio.yaml @@ -1,7 +1,7 @@ header: stdio.h header_template: stdio.h.def macros: - - macro_name: NULL + - macro_name: "NULL" macro_header: null-macro.h - macro_name: stdout macro_value: stdout diff --git a/libc/include/stdlib.yaml b/libc/include/stdlib.yaml index 3b2ff13c684b..495eb7e1317b 100644 --- a/libc/include/stdlib.yaml +++ b/libc/include/stdlib.yaml @@ -5,7 +5,7 @@ standards: merge_yaml_files: - stdlib-malloc.yaml macros: - - macro_name: NULL + - macro_name: "NULL" macro_header: null-macro.h types: - type_name: __atexithandler_t diff --git a/libc/include/string.yaml b/libc/include/string.yaml index 0bf297ee747a..22010f4afa81 100644 --- a/libc/include/string.yaml +++ b/libc/include/string.yaml @@ -2,7 +2,7 @@ header: string.h standards: - stdc macros: - - macro_name: NULL + - macro_name: "NULL" macro_header: null-macro.h types: - type_name: locale_t diff --git a/libc/include/time.yaml b/libc/include/time.yaml index 2f8024298fad..88e50d128823 100644 --- a/libc/include/time.yaml +++ b/libc/include/time.yaml @@ -1,7 +1,7 @@ header: time.h header_template: time.h.def macros: - - macro_name: NULL + - macro_name: "NULL" macro_header: null-macro.h types: - type_name: struct_timeval diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml index b8a0a748cd3a..fb5b19b523b3 100644 --- a/libc/include/wchar.yaml +++ b/libc/include/wchar.yaml @@ -1,11 +1,15 @@ header: wchar.h header_template: wchar.h.def macros: - - macro_name: NULL + - macro_name: "NULL" macro_header: null-macro.h types: - type_name: FILE - type_name: size_t + # TODO: Remove this once we have a function declaration using "struct tm" + # (wcsftime). We're declaring it here now, since libc++ expects + # forward-declaration of "struct tm" in the <wchar.h> header. + - type_name: struct_tm - type_name: wint_t - type_name: wchar_t - type_name: mbstate_t @@ -188,8 +192,8 @@ functions: standards: - stdc return_type: wchar_t * - arguments: - - type: wchar_t *__restrict + arguments: + - type: wchar_t *__restrict - type: const wchar_t *__restrict - type: size_t - name: wmemmove @@ -212,7 +216,7 @@ functions: standards: - stdc return_type: wchar_t * - arguments: + arguments: - type: wchar_t *__restrict - type: const wchar_t *__restrict - name: wcslcat diff --git a/libc/src/fenv/CMakeLists.txt b/libc/src/fenv/CMakeLists.txt index c5431b1b9d55..f36884597796 100644 --- a/libc/src/fenv/CMakeLists.txt +++ b/libc/src/fenv/CMakeLists.txt @@ -6,8 +6,6 @@ add_entrypoint_object( fegetround.h DEPENDS libc.src.__support.FPUtil.fenv_impl - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -18,8 +16,6 @@ add_entrypoint_object( fesetround.h DEPENDS libc.src.__support.FPUtil.fenv_impl - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -30,8 +26,6 @@ add_entrypoint_object( feclearexcept.h DEPENDS libc.src.__support.FPUtil.fenv_impl - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -42,8 +36,6 @@ add_entrypoint_object( feraiseexcept.h DEPENDS libc.src.__support.FPUtil.fenv_impl - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -54,8 +46,6 @@ add_entrypoint_object( fetestexcept.h DEPENDS libc.src.__support.FPUtil.fenv_impl - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -67,8 +57,6 @@ add_entrypoint_object( DEPENDS libc.hdr.types.fexcept_t libc.src.__support.FPUtil.fenv_impl - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -80,8 +68,6 @@ add_entrypoint_object( DEPENDS libc.hdr.types.fenv_t libc.src.__support.FPUtil.fenv_impl - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -93,8 +79,6 @@ add_entrypoint_object( DEPENDS libc.hdr.types.fenv_t libc.src.__support.FPUtil.fenv_impl - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -107,8 +91,6 @@ add_entrypoint_object( libc.hdr.fenv_macros libc.hdr.types.fexcept_t libc.src.__support.FPUtil.fenv_impl - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -119,8 +101,6 @@ add_entrypoint_object( fesetexcept.h DEPENDS libc.src.__support.FPUtil.fenv_impl - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -133,8 +113,6 @@ add_entrypoint_object( libc.hdr.fenv_macros libc.hdr.types.fexcept_t libc.src.__support.FPUtil.fenv_impl - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -147,8 +125,6 @@ add_entrypoint_object( libc.hdr.fenv_macros libc.hdr.types.fenv_t libc.src.__support.FPUtil.fenv_impl - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -161,8 +137,6 @@ add_entrypoint_object( libc.hdr.fenv_macros libc.hdr.types.fenv_t libc.src.__support.FPUtil.fenv_impl - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -173,8 +147,6 @@ add_entrypoint_object( feenableexcept.h DEPENDS libc.src.__support.FPUtil.fenv_impl - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -185,8 +157,6 @@ add_entrypoint_object( fedisableexcept.h DEPENDS libc.src.__support.FPUtil.fenv_impl - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -197,6 +167,4 @@ add_entrypoint_object( fegetexcept.h DEPENDS libc.src.__support.FPUtil.fenv_impl - COMPILE_OPTIONS - -O2 ) diff --git a/libc/src/math/amdgpu/CMakeLists.txt b/libc/src/math/amdgpu/CMakeLists.txt index e2cd3b99c303..d05d519b74b4 100644 --- a/libc/src/math/amdgpu/CMakeLists.txt +++ b/libc/src/math/amdgpu/CMakeLists.txt @@ -4,8 +4,6 @@ add_entrypoint_object( ceil.cpp HDRS ../ceil.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -14,8 +12,6 @@ add_entrypoint_object( ceilf.cpp HDRS ../ceilf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -24,8 +20,6 @@ add_entrypoint_object( copysign.cpp HDRS ../copysign.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -34,8 +28,6 @@ add_entrypoint_object( copysignf.cpp HDRS ../copysignf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -44,8 +36,6 @@ add_entrypoint_object( fabs.cpp HDRS ../fabs.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -54,8 +44,6 @@ add_entrypoint_object( fabsf.cpp HDRS ../fabsf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -64,8 +52,6 @@ add_entrypoint_object( floor.cpp HDRS ../floor.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -74,8 +60,6 @@ add_entrypoint_object( floorf.cpp HDRS ../floorf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -84,8 +68,6 @@ add_entrypoint_object( fma.cpp HDRS ../fma.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -94,8 +76,6 @@ add_entrypoint_object( fmaf.cpp HDRS ../fmaf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -104,8 +84,6 @@ add_entrypoint_object( fmax.cpp HDRS ../fmax.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -114,8 +92,6 @@ add_entrypoint_object( fmaxf.cpp HDRS ../fmaxf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -124,8 +100,6 @@ add_entrypoint_object( fmin.cpp HDRS ../fmin.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -134,8 +108,6 @@ add_entrypoint_object( fminf.cpp HDRS ../fminf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -144,8 +116,6 @@ add_entrypoint_object( fmod.cpp HDRS ../fmod.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -154,8 +124,6 @@ add_entrypoint_object( fmodf.cpp HDRS ../fmodf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -164,8 +132,6 @@ add_entrypoint_object( nearbyint.cpp HDRS ../nearbyint.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -174,8 +140,6 @@ add_entrypoint_object( nearbyintf.cpp HDRS ../nearbyintf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -184,8 +148,6 @@ add_entrypoint_object( remainder.cpp HDRS ../remainder.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -194,8 +156,6 @@ add_entrypoint_object( remainderf.cpp HDRS ../remainderf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -204,8 +164,6 @@ add_entrypoint_object( rint.cpp HDRS ../rint.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -214,8 +172,6 @@ add_entrypoint_object( rintf.cpp HDRS ../rintf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -224,8 +180,6 @@ add_entrypoint_object( round.cpp HDRS ../round.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -234,8 +188,6 @@ add_entrypoint_object( sqrt.cpp HDRS ../sqrt.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -244,8 +196,6 @@ add_entrypoint_object( sqrtf.cpp HDRS ../sqrtf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -254,8 +204,6 @@ add_entrypoint_object( trunc.cpp HDRS ../trunc.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -264,8 +212,6 @@ add_entrypoint_object( truncf.cpp HDRS ../truncf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -274,8 +220,6 @@ add_entrypoint_object( frexp.cpp HDRS ../frexp.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -284,8 +228,6 @@ add_entrypoint_object( frexpf.cpp HDRS ../frexpf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -294,8 +236,6 @@ add_entrypoint_object( scalbn.cpp HDRS ../scalbn.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -304,8 +244,6 @@ add_entrypoint_object( scalbnf.cpp HDRS ../scalbnf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -314,8 +252,6 @@ add_entrypoint_object( ldexp.cpp HDRS ../ldexp.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -324,8 +260,6 @@ add_entrypoint_object( ldexpf.cpp HDRS ../ldexpf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -336,7 +270,6 @@ add_entrypoint_object( ../tgamma.h COMPILE_OPTIONS ${bitcode_link_flags} - -O2 ) add_entrypoint_object( @@ -347,7 +280,6 @@ add_entrypoint_object( ../tgammaf.h COMPILE_OPTIONS ${bitcode_link_flags} - -O2 ) add_entrypoint_object( @@ -358,7 +290,6 @@ add_entrypoint_object( ../lgamma.h COMPILE_OPTIONS ${bitcode_link_flags} - -O2 ) add_entrypoint_object( @@ -369,5 +300,4 @@ add_entrypoint_object( ../lgamma_r.h COMPILE_OPTIONS ${bitcode_link_flags} - -O2 ) diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index 6068c36e558e..c048a64db6bc 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -2662,8 +2662,6 @@ add_entrypoint_object( ../fmaximum_mag.h DEPENDS libc.src.__support.FPUtil.basic_operations - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -2674,8 +2672,6 @@ add_entrypoint_object( ../fmaximum_magf.h DEPENDS libc.src.__support.FPUtil.basic_operations - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -2686,8 +2682,6 @@ add_entrypoint_object( ../fmaximum_magl.h DEPENDS libc.src.__support.FPUtil.basic_operations - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -2735,8 +2729,6 @@ add_entrypoint_object( ../fmaximum_mag_num.h DEPENDS libc.src.__support.FPUtil.basic_operations - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -2747,8 +2739,6 @@ add_entrypoint_object( ../fmaximum_mag_numf.h DEPENDS libc.src.__support.FPUtil.basic_operations - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -2759,8 +2749,6 @@ add_entrypoint_object( ../fmaximum_mag_numl.h DEPENDS libc.src.__support.FPUtil.basic_operations - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -2954,8 +2942,6 @@ add_entrypoint_object( ../fminimum_mag.h DEPENDS libc.src.__support.FPUtil.basic_operations - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -2966,8 +2952,6 @@ add_entrypoint_object( ../fminimum_magf.h DEPENDS libc.src.__support.FPUtil.basic_operations - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -2978,8 +2962,6 @@ add_entrypoint_object( ../fminimum_magl.h DEPENDS libc.src.__support.FPUtil.basic_operations - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -3027,8 +3009,6 @@ add_entrypoint_object( ../fminimum_mag_num.h DEPENDS libc.src.__support.FPUtil.basic_operations - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -3039,8 +3019,6 @@ add_entrypoint_object( ../fminimum_mag_numf.h DEPENDS libc.src.__support.FPUtil.basic_operations - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -3051,8 +3029,6 @@ add_entrypoint_object( ../fminimum_mag_numl.h DEPENDS libc.src.__support.FPUtil.basic_operations - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -4306,7 +4282,7 @@ add_entrypoint_object( libc.hdr.errno_macros libc.hdr.fenv_macros libc.src.__support.FPUtil.except_value_utils - libc.src.__support.FPUtil.fenv_impl + libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.rounding_mode libc.src.__support.macros.optimization @@ -4546,8 +4522,6 @@ add_entrypoint_object( atan.cpp HDRS ../atan.h - COMPILE_OPTIONS - -O3 DEPENDS libc.src.__support.math.atan ) diff --git a/libc/src/math/nvptx/CMakeLists.txt b/libc/src/math/nvptx/CMakeLists.txt index fcb2870b4bb1..e27c316ff20c 100644 --- a/libc/src/math/nvptx/CMakeLists.txt +++ b/libc/src/math/nvptx/CMakeLists.txt @@ -4,8 +4,6 @@ add_entrypoint_object( ceil.cpp HDRS ../ceil.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -14,8 +12,6 @@ add_entrypoint_object( ceilf.cpp HDRS ../ceilf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -24,8 +20,6 @@ add_entrypoint_object( copysign.cpp HDRS ../copysign.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -34,8 +28,6 @@ add_entrypoint_object( copysignf.cpp HDRS ../copysignf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -44,8 +36,6 @@ add_entrypoint_object( fabs.cpp HDRS ../fabs.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -54,8 +44,6 @@ add_entrypoint_object( fabsf.cpp HDRS ../fabsf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -64,8 +52,6 @@ add_entrypoint_object( floor.cpp HDRS ../floor.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -74,8 +60,6 @@ add_entrypoint_object( floorf.cpp HDRS ../floorf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -84,8 +68,6 @@ add_entrypoint_object( fma.cpp HDRS ../fma.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -94,8 +76,6 @@ add_entrypoint_object( fmaf.cpp HDRS ../fmaf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -104,8 +84,6 @@ add_entrypoint_object( fmax.cpp HDRS ../fmax.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -114,8 +92,6 @@ add_entrypoint_object( fmaxf.cpp HDRS ../fmaxf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -124,8 +100,6 @@ add_entrypoint_object( fmin.cpp HDRS ../fmin.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -134,8 +108,6 @@ add_entrypoint_object( fminf.cpp HDRS ../fminf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -144,8 +116,6 @@ add_entrypoint_object( fmod.cpp HDRS ../fmod.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -154,8 +124,6 @@ add_entrypoint_object( fmodf.cpp HDRS ../fmodf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -164,8 +132,6 @@ add_entrypoint_object( nearbyint.cpp HDRS ../nearbyint.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -174,8 +140,6 @@ add_entrypoint_object( nearbyintf.cpp HDRS ../nearbyintf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -184,8 +148,6 @@ add_entrypoint_object( remainder.cpp HDRS ../remainder.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -194,8 +156,6 @@ add_entrypoint_object( remainderf.cpp HDRS ../remainderf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -204,8 +164,6 @@ add_entrypoint_object( rint.cpp HDRS ../rint.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -214,8 +172,6 @@ add_entrypoint_object( rintf.cpp HDRS ../rintf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -224,8 +180,6 @@ add_entrypoint_object( round.cpp HDRS ../round.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -234,8 +188,6 @@ add_entrypoint_object( sqrt.cpp HDRS ../sqrt.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -244,8 +196,6 @@ add_entrypoint_object( sqrtf.cpp HDRS ../sqrtf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -254,8 +204,6 @@ add_entrypoint_object( trunc.cpp HDRS ../trunc.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -264,8 +212,6 @@ add_entrypoint_object( truncf.cpp HDRS ../truncf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -276,7 +222,6 @@ add_entrypoint_object( ../tgamma.h COMPILE_OPTIONS ${bitcode_link_flags} - -O2 ) add_entrypoint_object( @@ -287,7 +232,6 @@ add_entrypoint_object( ../tgammaf.h COMPILE_OPTIONS ${bitcode_link_flags} - -O2 ) add_entrypoint_object( @@ -298,7 +242,6 @@ add_entrypoint_object( ../lgamma.h COMPILE_OPTIONS ${bitcode_link_flags} - -O2 ) add_entrypoint_object( @@ -309,5 +252,4 @@ add_entrypoint_object( ../lgamma_r.h COMPILE_OPTIONS ${bitcode_link_flags} - -O2 ) diff --git a/libc/src/time/strftime.cpp b/libc/src/time/strftime.cpp index f36091bc9736..89b7d9bb7c1b 100644 --- a/libc/src/time/strftime.cpp +++ b/libc/src/time/strftime.cpp @@ -26,7 +26,7 @@ LLVM_LIBC_FUNCTION(size_t, strftime, int ret = strftime_core::strftime_main(&writer, format, timeptr); if (buffsz > 0) // if the buffsz is 0 the buffer may be a null pointer. wb.buff[wb.buff_cur] = '\0'; - return (ret < 0 || static_cast<size_t>(ret) > buffsz) ? 0 : ret; + return (ret < 0 || static_cast<size_t>(ret) >= buffsz) ? 0 : ret; } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/time/strftime_l.cpp b/libc/src/time/strftime_l.cpp index 201b85da39ee..409f8683b728 100644 --- a/libc/src/time/strftime_l.cpp +++ b/libc/src/time/strftime_l.cpp @@ -29,7 +29,7 @@ LLVM_LIBC_FUNCTION(size_t, strftime_l, int ret = strftime_core::strftime_main(&writer, format, timeptr); if (buffsz > 0) // if the buffsz is 0 the buffer may be a null pointer. wb.buff[wb.buff_cur] = '\0'; - return (ret < 0 || static_cast<size_t>(ret) > buffsz) ? 0 : ret; + return (ret < 0 || static_cast<size_t>(ret) >= buffsz) ? 0 : ret; } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/test/src/time/strftime_test.cpp b/libc/test/src/time/strftime_test.cpp index cac7560b2b94..522215279190 100644 --- a/libc/test/src/time/strftime_test.cpp +++ b/libc/test/src/time/strftime_test.cpp @@ -2326,3 +2326,24 @@ TEST(LlvmLibcStrftimeTest, TimeFormatFullDateTime) { // size_t written = 0; // SimplePaddedNum spn; // } + +TEST(LlvmLibcStrftimeTest, BufferTooSmall) { + struct tm time; + char tiny_buffer[1]; + + time.tm_year = get_adjusted_year(2025); + time.tm_mon = 10; + time.tm_mday = 24; + + size_t written = + LIBC_NAMESPACE::strftime(tiny_buffer, sizeof(tiny_buffer), "%F", &time); + EXPECT_EQ(written, size_t{0}); + + char small_buffer[10]; + + // The string "2025-11-24" is 10 chars, + // so strftime needs 10 + 1 bytes to write the string and the null terminator. + written = + LIBC_NAMESPACE::strftime(small_buffer, sizeof(small_buffer), "%F", &time); + EXPECT_EQ(written, size_t{0}); +} diff --git a/libc/utils/hdrgen/hdrgen/enumeration.py b/libc/utils/hdrgen/hdrgen/enumeration.py index 198720826720..1e0f64aec1ed 100644 --- a/libc/utils/hdrgen/hdrgen/enumeration.py +++ b/libc/utils/hdrgen/hdrgen/enumeration.py @@ -6,24 +6,14 @@ # # ==-------------------------------------------------------------------------==# -from functools import total_ordering +from hdrgen.symbol import Symbol -@total_ordering -class Enumeration: +class Enumeration(Symbol): def __init__(self, name, value): - self.name = name + super().__init__(name) self.value = value - def __eq__(self, other): - return self.name == other.name - - def __lt__(self, other): - return self.name < other.name - - def __hash__(self): - return self.name.__hash__() - def __str__(self): if self.value != None: return f"{self.name} = {self.value}" diff --git a/libc/utils/hdrgen/hdrgen/function.py b/libc/utils/hdrgen/hdrgen/function.py index f039996584e3..4de3406cc408 100644 --- a/libc/utils/hdrgen/hdrgen/function.py +++ b/libc/utils/hdrgen/hdrgen/function.py @@ -7,7 +7,7 @@ # ==-------------------------------------------------------------------------==# import re -from functools import total_ordering +from hdrgen.symbol import Symbol from hdrgen.type import Type @@ -37,14 +37,13 @@ KEYWORDS = [ NONIDENTIFIER = re.compile("[^a-zA-Z0-9_]+") -@total_ordering -class Function: +class Function(Symbol): def __init__( self, return_type, name, arguments, standards, guard=None, attributes=[] ): + super().__init__(name) assert return_type self.return_type = return_type - self.name = name self.arguments = [ arg if isinstance(arg, str) else arg["type"] for arg in arguments ] @@ -53,15 +52,6 @@ class Function: self.guard = guard self.attributes = attributes or [] - def __eq__(self, other): - return self.name == other.name - - def __lt__(self, other): - return self.name < other.name - - def __hash__(self): - return self.name.__hash__() - def signature_types(self): def collapse(type_string): assert type_string diff --git a/libc/utils/hdrgen/hdrgen/header.py b/libc/utils/hdrgen/hdrgen/header.py index 715d4b7c9b7e..f592327f06ad 100644 --- a/libc/utils/hdrgen/hdrgen/header.py +++ b/libc/utils/hdrgen/hdrgen/header.py @@ -35,6 +35,13 @@ NONIDENTIFIER = re.compile("[^a-zA-Z0-9_]+") COMMON_HEADER = PurePosixPath("__llvm-libc-common.h") +# These "attributes" are known macros defined in COMMON_HEADER. +# Others are found in "llvm-libc-macros/{name}.h". +COMMON_ATTRIBUTES = { + "_Noreturn", + "_Returns_twice", +} + # All the canonical identifiers are in lowercase for easy maintenance. # This maps them to the pretty descriptions to generate in header comments. LIBRARY_DESCRIPTIONS = { @@ -50,9 +57,7 @@ LIBRARY_DESCRIPTIONS = { HEADER_TEMPLATE = """\ //===-- {library} header <{header}> --===// // -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +{license_lines} // //===---------------------------------------------------------------------===// @@ -64,6 +69,12 @@ HEADER_TEMPLATE = """\ #endif // {guard} """ +LLVM_LICENSE_TEXT = [ + "Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.", + "See https://llvm.org/LICENSE.txt for license information.", + "SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception", +] + class HeaderFile: def __init__(self, name): @@ -74,8 +85,10 @@ class HeaderFile: self.enumerations = [] self.objects = [] self.functions = [] + self.extra_standards = {} self.standards = [] self.merge_yaml_files = [] + self.license_text = [] def add_macro(self, macro): self.macros.append(macro) @@ -98,6 +111,11 @@ class HeaderFile: self.enumerations = sorted(set(self.enumerations) | set(other.enumerations)) self.objects = sorted(set(self.objects) | set(other.objects)) self.functions = sorted(set(self.functions) | set(other.functions)) + self.extra_standards |= other.extra_standards + if self.license_text: + assert not other.license_text, "only one `license_text` allowed" + else: + self.license_text = other.license_text def all_types(self): return reduce( @@ -106,6 +124,13 @@ class HeaderFile: set(self.types), ) + def all_attributes(self): + return reduce( + lambda a, b: a | b, + [set(f.attributes) for f in self.functions], + set(), + ) + def all_standards(self): # FIXME: Only functions have the "standard" field, but all the entity # types should have one too. @@ -114,16 +139,24 @@ class HeaderFile: ) def includes(self): - return { - PurePosixPath("llvm-libc-macros") / macro.header - for macro in self.macros - if macro.header is not None - } | { - COMPILER_HEADER_TYPES.get( - typ.type_name, PurePosixPath("llvm-libc-types") / f"{typ.type_name}.h" - ) - for typ in self.all_types() - } + return ( + { + PurePosixPath("llvm-libc-macros") / macro.header + for macro in self.macros + if macro.header is not None + } + | { + COMPILER_HEADER_TYPES.get( + typ.name, + PurePosixPath("llvm-libc-types") / f"{typ.name}.h", + ) + for typ in self.all_types() + } + | { + PurePosixPath("llvm-libc-macros") / f"{attr}.h" + for attr in self.all_attributes() - COMMON_ATTRIBUTES + } + ) def header_guard(self): return "_LLVM_LIBC_" + "_".join( @@ -131,24 +164,29 @@ class HeaderFile: ) def library_description(self): + descriptions = LIBRARY_DESCRIPTIONS | self.extra_standards # If the header itself is in standard C, just call it that. if "stdc" in self.standards: - return LIBRARY_DESCRIPTIONS["stdc"] + return descriptions["stdc"] # If the header itself is in POSIX, just call it that. if "posix" in self.standards: - return LIBRARY_DESCRIPTIONS["posix"] + return descriptions["posix"] # Otherwise, consider the standards for each symbol as well. standards = self.all_standards() # Otherwise, it's described by all those that apply, but ignoring # "stdc" and "posix" since this is not a "stdc" or "posix" header. return " / ".join( sorted( - LIBRARY_DESCRIPTIONS[standard] + descriptions[standard] for standard in standards if standard not in {"stdc", "posix"} ) ) + def license_lines(self): + lines = self.license_text or LLVM_LICENSE_TEXT + return "\n".join([f"// {line}" for line in lines]) + def template(self, dir, files_read): if self.template_file is not None: # There's a custom template file, so just read it in and record @@ -162,6 +200,7 @@ class HeaderFile: library=self.library_description(), header=self.name, guard=self.header_guard(), + license_lines=self.license_lines(), ) def public_api(self): @@ -188,7 +227,7 @@ class HeaderFile: ) ] - for macro in self.macros: + for macro in sorted(self.macros): # When there is nothing to define, the Macro object converts to str # as an empty string. Don't emit a blank line for those cases. if str(macro): @@ -203,7 +242,12 @@ class HeaderFile: content.append("\n__BEGIN_C_DECLS\n") current_guard = None - for function in self.functions: + last_name = None + for function in sorted(self.functions): + # If the last function's name was the same after underscores, + # elide the blank line between the declarations. + if last_name == function.name_without_underscores(): + content.pop() if function.guard == None and current_guard == None: content.append(str(function) + " __NOEXCEPT;") content.append("") @@ -225,6 +269,7 @@ class HeaderFile: content.append(f"#ifdef {current_guard}") content.append(str(function) + " __NOEXCEPT;") content.append("") + last_name = function.name_without_underscores() if current_guard != None: content.pop() content.append(f"#endif // {current_guard}") diff --git a/libc/utils/hdrgen/hdrgen/macro.py b/libc/utils/hdrgen/hdrgen/macro.py index e42e82845694..4664d9fb0049 100644 --- a/libc/utils/hdrgen/hdrgen/macro.py +++ b/libc/utils/hdrgen/hdrgen/macro.py @@ -6,25 +6,15 @@ # # ==-------------------------------------------------------------------------==# -from functools import total_ordering +from hdrgen.symbol import Symbol -@total_ordering -class Macro: +class Macro(Symbol): def __init__(self, name, value=None, header=None): - self.name = name + super().__init__(name) self.value = value self.header = header - def __eq__(self, other): - return self.name == other.name - - def __lt__(self, other): - return self.name < other.name - - def __hash__(self): - return self.name.__hash__() - def __str__(self): if self.header != None: return "" diff --git a/libc/utils/hdrgen/hdrgen/main.py b/libc/utils/hdrgen/hdrgen/main.py index 25df41e506a1..c12e89ef771d 100755 --- a/libc/utils/hdrgen/hdrgen/main.py +++ b/libc/utils/hdrgen/hdrgen/main.py @@ -105,6 +105,7 @@ def main(): return 2 header.merge(merge_from_header) + assert header.name, f"`header: name.h` line is required in {yaml_file}" return header if args.json: diff --git a/libc/utils/hdrgen/hdrgen/object.py b/libc/utils/hdrgen/hdrgen/object.py index a311c37168d6..a2ab496bed01 100644 --- a/libc/utils/hdrgen/hdrgen/object.py +++ b/libc/utils/hdrgen/hdrgen/object.py @@ -6,23 +6,13 @@ # # ==-------------------------------------------------------------------------==# -from functools import total_ordering +from hdrgen.symbol import Symbol -@total_ordering -class Object: +class Object(Symbol): def __init__(self, name, type): - self.name = name + super().__init__(name) self.type = type - def __eq__(self, other): - return self.name == other.name - - def __lt__(self, other): - return self.name < other.name - - def __hash__(self): - return self.name.__hash__() - def __str__(self): return f"extern {self.type} {self.name};" diff --git a/libc/utils/hdrgen/hdrgen/symbol.py b/libc/utils/hdrgen/hdrgen/symbol.py new file mode 100644 index 000000000000..28e9def128e4 --- /dev/null +++ b/libc/utils/hdrgen/hdrgen/symbol.py @@ -0,0 +1,41 @@ +# ====-- Symbol class for libc function headers----------------*- python -*--==# +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ==-------------------------------------------------------------------------==# + +from functools import total_ordering + + +@total_ordering +class Symbol: + """ + Symbol is the common superclass for each kind of entity named by an + identifier. It provides the name field, and defines sort ordering, + hashing, and equality based only on the name. The sorting is pretty + presentation order for identifiers, which is to say it first sorts + lexically but ignores leading underscores and secondarily sorts with the + fewest underscores first. + """ + + def __init__(self, name): + assert name + self.name = name + + def __eq__(self, other): + return self.name == other.name + + def __hash__(self): + return self.name.__hash__() + + def name_without_underscores(self): + return self.name.lstrip("_") + + def name_sort_key(self): + ident = self.name_without_underscores() + return ident, len(self.name) - len(ident) + + def __lt__(self, other): + return self.name_sort_key() < other.name_sort_key() diff --git a/libc/utils/hdrgen/hdrgen/type.py b/libc/utils/hdrgen/hdrgen/type.py index 0c0af8569c61..20c1881a9379 100644 --- a/libc/utils/hdrgen/hdrgen/type.py +++ b/libc/utils/hdrgen/hdrgen/type.py @@ -6,20 +6,10 @@ # # ==-------------------------------------------------------------------------==# -from functools import total_ordering +from hdrgen.symbol import Symbol -@total_ordering -class Type: - def __init__(self, type_name): - assert type_name - self.type_name = type_name - - def __eq__(self, other): - return self.type_name == other.type_name - - def __lt__(self, other): - return self.type_name < other.type_name - - def __hash__(self): - return self.type_name.__hash__() +class Type(Symbol): + # A type so far carries no specific information beyond its name. + def __init__(self, name): + super().__init__(name) diff --git a/libc/utils/hdrgen/hdrgen/yaml_to_classes.py b/libc/utils/hdrgen/hdrgen/yaml_to_classes.py index ebe7781d449f..9eddbe615cbb 100644 --- a/libc/utils/hdrgen/hdrgen/yaml_to_classes.py +++ b/libc/utils/hdrgen/hdrgen/yaml_to_classes.py @@ -37,6 +37,8 @@ def yaml_to_classes(yaml_data, header_class, entry_points=None): header = header_class(header_name) header.template_file = yaml_data.get("header_template") header.standards = yaml_data.get("standards", []) + header.extra_standards = yaml_data.get("extra_standards", {}) + header.license_text = yaml_data.get("license_text", []) header.merge_yaml_files = yaml_data.get("merge_yaml_files", []) for macro_data in yaml_data.get("macros", []): diff --git a/libc/utils/hdrgen/tests/expected_output/custom.h b/libc/utils/hdrgen/tests/expected_output/custom.h new file mode 100644 index 000000000000..5f9ed231490f --- /dev/null +++ b/libc/utils/hdrgen/tests/expected_output/custom.h @@ -0,0 +1,21 @@ +//===-- Wile E. Coyote header <custom.h> --===// +// +// Caveat emptor. +// I never studied law. +// +//===---------------------------------------------------------------------===// + +#ifndef _LLVM_LIBC_CUSTOM_H +#define _LLVM_LIBC_CUSTOM_H + +#include "__llvm-libc-common.h" +#include "llvm-libc-types/meep.h" +#include "llvm-libc-types/road.h" + +__BEGIN_C_DECLS + +road runner(meep, meep) __NOEXCEPT; + +__END_C_DECLS + +#endif // _LLVM_LIBC_CUSTOM_H diff --git a/libc/utils/hdrgen/tests/expected_output/sorting.h b/libc/utils/hdrgen/tests/expected_output/sorting.h new file mode 100644 index 000000000000..a091a421b2c3 --- /dev/null +++ b/libc/utils/hdrgen/tests/expected_output/sorting.h @@ -0,0 +1,24 @@ +//===-- Standard C header <sorting.h> --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// + +#ifndef _LLVM_LIBC_SORTING_H +#define _LLVM_LIBC_SORTING_H + +#include "__llvm-libc-common.h" + +__BEGIN_C_DECLS + +void func_with_aliases(int) __NOEXCEPT; +void _func_with_aliases(int) __NOEXCEPT; +void __func_with_aliases(int) __NOEXCEPT; + +void gunk(const char *) __NOEXCEPT; + +__END_C_DECLS + +#endif // _LLVM_LIBC_SORTING_H diff --git a/libc/utils/hdrgen/tests/expected_output/test_header.h b/libc/utils/hdrgen/tests/expected_output/test_header.h index 748c09808c12..49112a353f7b 100644 --- a/libc/utils/hdrgen/tests/expected_output/test_header.h +++ b/libc/utils/hdrgen/tests/expected_output/test_header.h @@ -12,6 +12,7 @@ #include "__llvm-libc-common.h" #include "llvm-libc-macros/float16-macros.h" +#include "llvm-libc-macros/CONST_FUNC_A.h" #include "llvm-libc-macros/test_more-macros.h" #include "llvm-libc-macros/test_small-macros.h" #include "llvm-libc-types/float128.h" diff --git a/libc/utils/hdrgen/tests/expected_output/test_small.json b/libc/utils/hdrgen/tests/expected_output/test_small.json index 9cc73d013a67..8502df23b9a4 100644 --- a/libc/utils/hdrgen/tests/expected_output/test_small.json +++ b/libc/utils/hdrgen/tests/expected_output/test_small.json @@ -4,6 +4,7 @@ "standards": [], "includes": [ "__llvm-libc-common.h", + "llvm-libc-macros/CONST_FUNC_A.h", "llvm-libc-macros/test_more-macros.h", "llvm-libc-macros/test_small-macros.h", "llvm-libc-types/float128.h", diff --git a/libc/utils/hdrgen/tests/input/custom-common.yaml b/libc/utils/hdrgen/tests/input/custom-common.yaml new file mode 100644 index 000000000000..909a3ba5163a --- /dev/null +++ b/libc/utils/hdrgen/tests/input/custom-common.yaml @@ -0,0 +1,6 @@ +license_text: + - Caveat emptor. + - I never studied law. + +extra_standards: + acme: Wile E. Coyote diff --git a/libc/utils/hdrgen/tests/input/custom.yaml b/libc/utils/hdrgen/tests/input/custom.yaml new file mode 100644 index 000000000000..7d3ff8ec421d --- /dev/null +++ b/libc/utils/hdrgen/tests/input/custom.yaml @@ -0,0 +1,13 @@ +merge_yaml_files: + - custom-common.yaml + +header: custom.h +standards: + - acme + +functions: + - name: runner + return_type: road + arguments: + - type: meep + - type: meep diff --git a/libc/utils/hdrgen/tests/input/sorting.yaml b/libc/utils/hdrgen/tests/input/sorting.yaml new file mode 100644 index 000000000000..3c26cde9e6c4 --- /dev/null +++ b/libc/utils/hdrgen/tests/input/sorting.yaml @@ -0,0 +1,20 @@ +header: sorting.h +standards: + - stdc +functions: + - name: gunk + return_type: void + arguments: + - type: const char * + - name: _func_with_aliases + return_type: void + arguments: + - type: int + - name: func_with_aliases + return_type: void + arguments: + - type: int + - name: __func_with_aliases + return_type: void + arguments: + - type: int diff --git a/libc/utils/hdrgen/tests/test_integration.py b/libc/utils/hdrgen/tests/test_integration.py index bf393d26a810..b975d8ff007b 100644 --- a/libc/utils/hdrgen/tests/test_integration.py +++ b/libc/utils/hdrgen/tests/test_integration.py @@ -59,6 +59,13 @@ class TestHeaderGenIntegration(unittest.TestCase): self.run_script(yaml_file, output_file) self.compare_files(output_file, expected_output_file) + def test_custom_license_and_standards(self): + yaml_file = self.source_dir / "input" / "custom.yaml" + expected_output_file = self.source_dir / "expected_output" / "custom.h" + output_file = self.output_dir / "custom.h" + self.run_script(yaml_file, output_file) + self.compare_files(output_file, expected_output_file) + def test_generate_json(self): yaml_file = self.source_dir / "input/test_small.yaml" expected_output_file = self.source_dir / "expected_output/test_small.json" @@ -68,6 +75,13 @@ class TestHeaderGenIntegration(unittest.TestCase): self.compare_files(output_file, expected_output_file) + def test_sorting(self): + yaml_file = self.source_dir / "input" / "sorting.yaml" + expected_output_file = self.source_dir / "expected_output" / "sorting.h" + output_file = self.output_dir / "sorting.h" + self.run_script(yaml_file, output_file) + self.compare_files(output_file, expected_output_file) + def main(): parser = argparse.ArgumentParser(description="TestHeaderGenIntegration arguments") diff --git a/libcxx/test/libcxx/input.output/iostreams.base/ios.base/ios.base.cons/dtor.uninitialized.pass.cpp b/libcxx/test/libcxx/input.output/iostreams.base/ios.base/ios.base.cons/dtor.uninitialized.pass.cpp index f17c1483c4a9..16d66e3be14e 100644 --- a/libcxx/test/libcxx/input.output/iostreams.base/ios.base/ios.base.cons/dtor.uninitialized.pass.cpp +++ b/libcxx/test/libcxx/input.output/iostreams.base/ios.base/ios.base.cons/dtor.uninitialized.pass.cpp @@ -6,14 +6,12 @@ // //===----------------------------------------------------------------------===// -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - // UNSUPPORTED: no-exceptions // The fix for issue 57964 requires an updated dylib due to explicit // instantiations. That means Apple backdeployment targets remain broken. -// XFAIL: using-built-library-before-llvm-19 +// TODO: Remove && !darwin once availability markup for LLVM 19 on macOS has been added +// XFAIL: using-built-library-before-llvm-19 && !darwin // <ios> diff --git a/libcxx/test/std/input.output/file.streams/fstreams/filebuf.virtuals/setbuf.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/filebuf.virtuals/setbuf.pass.cpp index 9d14abcedd42..00aa97a45cc2 100644 --- a/libcxx/test/std/input.output/file.streams/fstreams/filebuf.virtuals/setbuf.pass.cpp +++ b/libcxx/test/std/input.output/file.streams/fstreams/filebuf.virtuals/setbuf.pass.cpp @@ -6,16 +6,14 @@ // //===----------------------------------------------------------------------===// -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - // <fstream> // basic_streambuf<charT, traits>* setbuf(char_type* s, streamsize n) override; // This test requires the fix to https://llvm.org/PR60509 in the dylib, // which landed in 5afb937d8a30445642ccaf33866ee4cdd0713222. -// XFAIL: using-built-library-before-llvm-19 +// TODO: Remove && !darwin once availability markup for LLVM 19 on macOS has been added +// XFAIL: using-built-library-before-llvm-19 && !darwin #include <fstream> #include <cstddef> diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/sync.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/sync.pass.cpp index 3b685950d36a..b04d2c07ebb1 100644 --- a/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/sync.pass.cpp +++ b/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/sync.pass.cpp @@ -6,9 +6,6 @@ // //===----------------------------------------------------------------------===// -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - // <istream> // int sync(); @@ -16,7 +13,8 @@ // The fix for bug 51497 and bug 51499 require and updated dylib due to // explicit instantiations. That means Apple backdeployment targets remain // broken. -// XFAIL: using-built-library-before-llvm-19 +// TODO: Remove && !darwin once availability markup for LLVM 19 on macOS has been added +// XFAIL: using-built-library-before-llvm-19 && !darwin #include <istream> #include <cassert> diff --git a/libcxx/test/std/localization/locale.categories/category.collate/locale.collate.byname/compare.pass.cpp b/libcxx/test/std/localization/locale.categories/category.collate/locale.collate.byname/compare.pass.cpp index 4905ed40f4a2..8ae6bc2d3ba6 100644 --- a/libcxx/test/std/localization/locale.categories/category.collate/locale.collate.byname/compare.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.collate/locale.collate.byname/compare.pass.cpp @@ -6,9 +6,6 @@ // //===----------------------------------------------------------------------===// -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - // Bionic has minimal locale support, investigate this later. // XFAIL: LIBCXX-ANDROID-FIXME @@ -56,14 +53,7 @@ int main(int, char**) ASSERT_COMPARE(std::string, "AAA", "BBB", -1); ASSERT_COMPARE(std::string, "bbb", "aaa", 1); ASSERT_COMPARE(std::string, "ccc", "ccc", 0); - -#if defined(__APPLE__) - // Apple's default collation is case-sensitive - ASSERT_COMPARE(std::string, "aaaaaaA", "BaaaaaA", 1); -#else - // Glibc, Windows, and FreeBSD's default collation is case-insensitive ASSERT_COMPARE(std::string, "aaaaaaA", "BaaaaaA", -1); -#endif } #ifndef TEST_HAS_NO_WIDE_CHARACTERS { @@ -73,13 +63,7 @@ int main(int, char**) ASSERT_COMPARE(std::wstring, L"AAA", L"BBB", -1); ASSERT_COMPARE(std::wstring, L"bbb", L"aaa", 1); ASSERT_COMPARE(std::wstring, L"ccc", L"ccc", 0); -#if defined(__APPLE__) - // Apple's default collation is case-sensitive - ASSERT_COMPARE(std::wstring, L"aaaaaaA", L"BaaaaaA", 1); -#else - // Glibc, Windows, and FreeBSD's default collation is case-insensitive ASSERT_COMPARE(std::wstring, L"aaaaaaA", L"BaaaaaA", -1); -#endif } #endif } diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_fr_FR.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_fr_FR.pass.cpp index ea6b07934510..c9ed59f3cb9a 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_fr_FR.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_fr_FR.pass.cpp @@ -6,11 +6,6 @@ // //===----------------------------------------------------------------------===// -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - -// XFAIL: darwin - // NetBSD does not support LC_MONETARY at the moment // XFAIL: netbsd diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_ru_RU.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_ru_RU.pass.cpp index f98758d086de..371cf0e90c8d 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_ru_RU.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_ru_RU.pass.cpp @@ -6,9 +6,6 @@ // //===----------------------------------------------------------------------===// -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - // NetBSD does not support LC_MONETARY at the moment // XFAIL: netbsd diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_zh_CN.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_zh_CN.pass.cpp index 6980b7ae77db..c86df7e6b53b 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_zh_CN.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_zh_CN.pass.cpp @@ -6,9 +6,6 @@ // //===----------------------------------------------------------------------===// -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - // NetBSD does not support LC_MONETARY at the moment // XFAIL: netbsd @@ -158,7 +155,7 @@ int main(int, char**) std::noshowbase(ios); } { // negative one, showbase -#ifdef _AIX +#if defined(_AIX) || defined(__APPLE__) std::string v = "-" + currency_symbol + "0.01"; #else std::string v = currency_symbol + "-0.01"; @@ -172,7 +169,7 @@ int main(int, char**) assert(ex == -1); } { // negative one, showbase -#ifdef _AIX +#if defined(_AIX) || defined(__APPLE__) std::string v = "-" + currency_symbol + "0.01"; #else std::string v = currency_symbol + "-0.01"; @@ -212,7 +209,7 @@ int main(int, char**) std::noshowbase(ios); } { // negative, showbase -#ifdef _AIX +#if defined(_AIX) || defined(__APPLE__) std::string v = "-" + currency_symbol + "1,234,567.89"; #else std::string v = currency_symbol + "-1,234,567.89"; @@ -333,7 +330,7 @@ int main(int, char**) std::noshowbase(ios); } { // negative one, showbase -#if defined(TEST_HAS_GLIBC) || defined(_AIX) +#if defined(TEST_HAS_GLIBC) || defined(_AIX) || defined(__APPLE__) std::string v = "-" + currency_name + "0.01"; #else std::string v = currency_name + "-0.01"; @@ -348,7 +345,7 @@ int main(int, char**) assert(ex == -1); } { // negative one, showbase -#if defined(TEST_HAS_GLIBC) || defined(_AIX) +#if defined(TEST_HAS_GLIBC) || defined(_AIX) || defined(__APPLE__) std::string v = "-" + currency_name + "0.01"; #else std::string v = currency_name + "-0.01"; @@ -389,7 +386,7 @@ int main(int, char**) std::noshowbase(ios); } { // negative, showbase -#if defined(TEST_HAS_GLIBC) || defined(_AIX) +#if defined(TEST_HAS_GLIBC) || defined(_AIX) || defined(__APPLE__) std::string v = "-" + currency_name + "1,234,567.89"; #else std::string v = currency_name + "-1,234,567.89"; @@ -518,7 +515,7 @@ int main(int, char**) std::noshowbase(ios); } { // negative one, showbase -# ifdef _AIX +# if defined(_AIX) || defined(__APPLE__) std::wstring v = L"-" + w_currency_symbol + L"0.01"; # else std::wstring v = w_currency_symbol + L"-0.01"; @@ -532,7 +529,7 @@ int main(int, char**) assert(ex == -1); } { // negative one, showbase -# ifdef _AIX +# if defined(_AIX) || defined(__APPLE__) std::wstring v = L"-" + w_currency_symbol + L"0.01"; # else std::wstring v = w_currency_symbol + L"-0.01"; @@ -572,7 +569,7 @@ int main(int, char**) std::noshowbase(ios); } { // negative, showbase -# ifdef _AIX +# if defined(_AIX) || defined(__APPLE__) std::wstring v = L"-" + w_currency_symbol + L"1,234,567.89"; # else std::wstring v = w_currency_symbol + L"-1,234,567.89"; @@ -693,7 +690,7 @@ int main(int, char**) std::noshowbase(ios); } { // negative one, showbase -# if defined(TEST_HAS_GLIBC) || defined(_AIX) +# if defined(TEST_HAS_GLIBC) || defined(_AIX) || defined(__APPLE__) std::wstring v = L"-" + w_currency_name + L"0.01"; # else std::wstring v = w_currency_name + L"-0.01"; @@ -707,7 +704,7 @@ int main(int, char**) assert(ex == -1); } { // negative one, showbase -# if defined(TEST_HAS_GLIBC) || defined(_AIX) +# if defined(TEST_HAS_GLIBC) || defined(_AIX) || defined(__APPLE__) std::wstring v = L"-" + w_currency_name + L"0.01"; # else std::wstring v = w_currency_name + L"-0.01"; @@ -747,7 +744,7 @@ int main(int, char**) std::noshowbase(ios); } { // negative, showbase -# if defined(TEST_HAS_GLIBC) || defined(_AIX) +# if defined(TEST_HAS_GLIBC) || defined(_AIX) || defined(__APPLE__) std::wstring v = L"-" + w_currency_name + L"1,234,567.89"; # else std::wstring v = w_currency_name + L"-1,234,567.89"; diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_fr_FR.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_fr_FR.pass.cpp index 14745996b9fd..f9d7998b07ff 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_fr_FR.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_fr_FR.pass.cpp @@ -6,11 +6,6 @@ // //===----------------------------------------------------------------------===// -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - -// XFAIL: darwin - // NetBSD does not support LC_MONETARY at the moment // XFAIL: netbsd diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_ru_RU.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_ru_RU.pass.cpp index 0455e5949c44..be1e39748846 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_ru_RU.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_ru_RU.pass.cpp @@ -6,9 +6,6 @@ // //===----------------------------------------------------------------------===// -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - // NetBSD does not support LC_MONETARY at the moment // XFAIL: netbsd diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_zh_CN.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_zh_CN.pass.cpp index 68640fabb73b..25046a841708 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_zh_CN.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_zh_CN.pass.cpp @@ -6,9 +6,6 @@ // //===----------------------------------------------------------------------===// -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - // NetBSD does not support LC_MONETARY at the moment // XFAIL: netbsd @@ -122,7 +119,7 @@ int main(int, char**) char str[100]; cpp17_output_iterator<char*> iter = f.put(cpp17_output_iterator<char*>(str), false, ios, '*', v); std::string ex(str, base(iter)); -#ifdef _AIX +#if defined(_AIX) || defined(__APPLE__) assert(ex == "-" + currency_symbol + "0.01"); #else assert(ex == currency_symbol + "-0.01"); @@ -142,7 +139,7 @@ int main(int, char**) char str[100]; cpp17_output_iterator<char*> iter = f.put(cpp17_output_iterator<char*>(str), false, ios, '*', v); std::string ex(str, base(iter)); -#ifdef _AIX +#if defined(_AIX) || defined(__APPLE__) assert(ex == "-" + currency_symbol + "1,234,567.89"); #else assert(ex == currency_symbol + "-1,234,567.89"); @@ -156,7 +153,7 @@ int main(int, char**) char str[100]; cpp17_output_iterator<char*> iter = f.put(cpp17_output_iterator<char*>(str), false, ios, ' ', v); std::string ex(str, base(iter)); -#ifdef _AIX +#if defined(_AIX) || defined(__APPLE__) assert(ex == "-" + currency_symbol + "1,234,567.89" + currency_symbol_padding); #else assert(ex == currency_symbol + "-1,234,567.89" + currency_symbol_padding); @@ -171,7 +168,7 @@ int main(int, char**) char str[100]; cpp17_output_iterator<char*> iter = f.put(cpp17_output_iterator<char*>(str), false, ios, ' ', v); std::string ex(str, base(iter)); -#ifdef _AIX +#if defined(_AIX) || defined(__APPLE__) assert(ex == "-" + currency_symbol + currency_symbol_padding + "1,234,567.89"); #else assert(ex == currency_symbol + "-" + currency_symbol_padding + "1,234,567.89"); @@ -186,7 +183,7 @@ int main(int, char**) char str[100]; cpp17_output_iterator<char*> iter = f.put(cpp17_output_iterator<char*>(str), false, ios, ' ', v); std::string ex(str, base(iter)); -#ifdef _AIX +#if defined(_AIX) || defined(__APPLE__) assert(ex == currency_symbol_padding + "-" + currency_symbol + "1,234,567.89"); #else assert(ex == currency_symbol_padding + currency_symbol + "-1,234,567.89"); @@ -239,7 +236,7 @@ int main(int, char**) char str[100]; cpp17_output_iterator<char*> iter = f.put(cpp17_output_iterator<char*>(str), true, ios, '*', v); std::string ex(str, base(iter)); -#if defined(TEST_HAS_GLIBC) || defined(_AIX) +#if defined(TEST_HAS_GLIBC) || defined(_AIX) || defined(__APPLE__) assert(ex == "-" + currency_name + "0.01"); #else assert(ex == currency_name + "-0.01"); @@ -259,7 +256,7 @@ int main(int, char**) char str[100]; cpp17_output_iterator<char*> iter = f.put(cpp17_output_iterator<char*>(str), true, ios, '*', v); std::string ex(str, base(iter)); -#if defined(TEST_HAS_GLIBC) || defined(_AIX) +#if defined(TEST_HAS_GLIBC) || defined(_AIX) || defined(__APPLE__) assert(ex == "-" + currency_name + "1,234,567.89"); #else assert(ex == currency_name + "-1,234,567.89"); @@ -273,7 +270,7 @@ int main(int, char**) char str[100]; cpp17_output_iterator<char*> iter = f.put(cpp17_output_iterator<char*>(str), true, ios, ' ', v); std::string ex(str, base(iter)); -#if defined(TEST_HAS_GLIBC) || defined(_AIX) +#if defined(TEST_HAS_GLIBC) || defined(_AIX) || defined(__APPLE__) assert(ex == "-" + currency_name + "1,234,567.89" + currency_name_padding); #else assert(ex == currency_name + "-1,234,567.89" + currency_name_padding); @@ -288,7 +285,7 @@ int main(int, char**) char str[100]; cpp17_output_iterator<char*> iter = f.put(cpp17_output_iterator<char*>(str), true, ios, ' ', v); std::string ex(str, base(iter)); -#if defined(TEST_HAS_GLIBC) || defined(_AIX) +#if defined(TEST_HAS_GLIBC) || defined(_AIX) || defined(__APPLE__) assert(ex == "-" + currency_name + currency_name_padding + "1,234,567.89"); #else assert(ex == currency_name + "-" + currency_name_padding + "1,234,567.89"); @@ -303,7 +300,7 @@ int main(int, char**) char str[100]; cpp17_output_iterator<char*> iter = f.put(cpp17_output_iterator<char*>(str), true, ios, ' ', v); std::string ex(str, base(iter)); -#if defined(TEST_HAS_GLIBC) || defined(_AIX) +#if defined(TEST_HAS_GLIBC) || defined(_AIX) || defined(__APPLE__) assert(ex == currency_name_padding + "-" + currency_name + "1,234,567.89"); #else assert(ex == currency_name_padding + currency_name + "-1,234,567.89"); @@ -366,7 +363,7 @@ int main(int, char**) wchar_t str[100]; cpp17_output_iterator<wchar_t*> iter = f.put(cpp17_output_iterator<wchar_t*>(str), false, ios, '*', v); std::wstring ex(str, base(iter)); -# ifdef _AIX +# if defined(_AIX) || defined(__APPLE__) assert(ex == L"-" + currency_symbol + L"0.01"); # else assert(ex == currency_symbol + L"-0.01"); @@ -386,7 +383,7 @@ int main(int, char**) wchar_t str[100]; cpp17_output_iterator<wchar_t*> iter = f.put(cpp17_output_iterator<wchar_t*>(str), false, ios, '*', v); std::wstring ex(str, base(iter)); -# ifdef _AIX +# if defined(_AIX) || defined(__APPLE__) assert(ex == L"-" + currency_symbol + L"1,234,567.89"); # else assert(ex == currency_symbol + L"-1,234,567.89"); @@ -400,7 +397,7 @@ int main(int, char**) wchar_t str[100]; cpp17_output_iterator<wchar_t*> iter = f.put(cpp17_output_iterator<wchar_t*>(str), false, ios, ' ', v); std::wstring ex(str, base(iter)); -# ifdef _AIX +# if defined(_AIX) || defined(__APPLE__) assert(ex == L"-" + currency_symbol + L"1,234,567.89 "); # else assert(ex == currency_symbol + L"-1,234,567.89 "); @@ -415,7 +412,7 @@ int main(int, char**) wchar_t str[100]; cpp17_output_iterator<wchar_t*> iter = f.put(cpp17_output_iterator<wchar_t*>(str), false, ios, ' ', v); std::wstring ex(str, base(iter)); -# ifdef _AIX +# if defined(_AIX) || defined(__APPLE__) assert(ex == L"-" + currency_symbol + L" 1,234,567.89"); # else assert(ex == currency_symbol + L"- 1,234,567.89"); @@ -430,7 +427,7 @@ int main(int, char**) wchar_t str[100]; cpp17_output_iterator<wchar_t*> iter = f.put(cpp17_output_iterator<wchar_t*>(str), false, ios, ' ', v); std::wstring ex(str, base(iter)); -# ifdef _AIX +# if defined(_AIX) || defined(__APPLE__) assert(ex == L" -" + currency_symbol + L"1,234,567.89"); # else assert(ex == L" " + currency_symbol + L"-1,234,567.89"); @@ -483,7 +480,7 @@ int main(int, char**) wchar_t str[100]; cpp17_output_iterator<wchar_t*> iter = f.put(cpp17_output_iterator<wchar_t*>(str), true, ios, '*', v); std::wstring ex(str, base(iter)); -# if defined(TEST_HAS_GLIBC) || defined(_AIX) +# if defined(TEST_HAS_GLIBC) || defined(_AIX) || defined(__APPLE__) assert(ex == L"-" + currency_name + L"0.01"); #else assert(ex == currency_name + L"-0.01"); @@ -503,7 +500,7 @@ int main(int, char**) wchar_t str[100]; cpp17_output_iterator<wchar_t*> iter = f.put(cpp17_output_iterator<wchar_t*>(str), true, ios, '*', v); std::wstring ex(str, base(iter)); -# if defined(TEST_HAS_GLIBC) || defined(_AIX) +# if defined(TEST_HAS_GLIBC) || defined(_AIX) || defined(__APPLE__) assert(ex == L"-" + currency_name + L"1,234,567.89"); #else assert(ex == currency_name + L"-1,234,567.89"); @@ -517,7 +514,7 @@ int main(int, char**) wchar_t str[100]; cpp17_output_iterator<wchar_t*> iter = f.put(cpp17_output_iterator<wchar_t*>(str), true, ios, ' ', v); std::wstring ex(str, base(iter)); -# if defined(TEST_HAS_GLIBC) || defined(_AIX) +# if defined(TEST_HAS_GLIBC) || defined(_AIX) || defined(__APPLE__) assert(ex == L"-" + currency_name + L"1,234,567.89" + currency_name_padding); #else assert(ex == currency_name + L"-1,234,567.89" + currency_name_padding); @@ -532,7 +529,7 @@ int main(int, char**) wchar_t str[100]; cpp17_output_iterator<wchar_t*> iter = f.put(cpp17_output_iterator<wchar_t*>(str), true, ios, ' ', v); std::wstring ex(str, base(iter)); -# if defined(TEST_HAS_GLIBC) || defined(_AIX) +# if defined(TEST_HAS_GLIBC) || defined(_AIX) || defined(__APPLE__) assert(ex == L"-" + currency_name + currency_name_padding + L"1,234,567.89"); #else assert(ex == currency_name + L"-" + currency_name_padding + L"1,234,567.89"); @@ -547,7 +544,7 @@ int main(int, char**) wchar_t str[100]; cpp17_output_iterator<wchar_t*> iter = f.put(cpp17_output_iterator<wchar_t*>(str), true, ios, ' ', v); std::wstring ex(str, base(iter)); -# if defined(TEST_HAS_GLIBC) || defined(_AIX) +# if defined(TEST_HAS_GLIBC) || defined(_AIX) || defined(__APPLE__) assert(ex == currency_name_padding + L"-" + currency_name + L"1,234,567.89"); #else assert(ex == currency_name_padding + currency_name + L"-1,234,567.89"); diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/curr_symbol.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/curr_symbol.pass.cpp index 9c1253d47acd..e7f0f29e8774 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/curr_symbol.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/curr_symbol.pass.cpp @@ -6,9 +6,6 @@ // //===----------------------------------------------------------------------===// -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - // NetBSD does not support LC_MONETARY at the moment // XFAIL: netbsd @@ -117,11 +114,7 @@ int main(int, char**) { Fnf f(LOCALE_fr_FR_UTF_8, 1); -#ifdef __APPLE__ - assert(f.curr_symbol() == " Eu"); -#else assert(f.curr_symbol() == " \u20ac"); -#endif } { Fnt f(LOCALE_fr_FR_UTF_8, 1); @@ -130,11 +123,7 @@ int main(int, char**) #ifndef TEST_HAS_NO_WIDE_CHARACTERS { Fwf f(LOCALE_fr_FR_UTF_8, 1); -#ifdef __APPLE__ - assert(f.curr_symbol() == L" Eu"); -#else assert(f.curr_symbol() == L" \u20ac"); -#endif } { Fwt f(LOCALE_fr_FR_UTF_8, 1); @@ -164,7 +153,7 @@ int main(int, char**) { Fnf f(LOCALE_zh_CN_UTF_8, 1); -#ifdef _WIN32 +#if defined(_WIN32) || defined(__APPLE__) assert(f.curr_symbol() == "\xC2\xA5"); // \u00A5 #else assert(f.curr_symbol() == "\xEF\xBF\xA5"); // \uFFE5 @@ -177,7 +166,7 @@ int main(int, char**) #ifndef TEST_HAS_NO_WIDE_CHARACTERS { Fwf f(LOCALE_zh_CN_UTF_8, 1); -#ifdef _WIN32 +#if defined(_WIN32) || defined(__APPLE__) assert(f.curr_symbol() == L"\u00A5"); #else assert(f.curr_symbol() == L"\uFFE5"); diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/grouping.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/grouping.pass.cpp index 630b2739c88a..90dc6c4d7a2a 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/grouping.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/grouping.pass.cpp @@ -6,11 +6,6 @@ // //===----------------------------------------------------------------------===// -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - -// XFAIL: darwin -// // NetBSD does not support LC_MONETARY at the moment // XFAIL: netbsd diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/neg_format.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/neg_format.pass.cpp index a3e3d853524b..e9528147dfe6 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/neg_format.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/neg_format.pass.cpp @@ -6,9 +6,6 @@ // //===----------------------------------------------------------------------===// -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - // NetBSD does not support LC_MONETARY at the moment // XFAIL: netbsd @@ -82,14 +79,6 @@ void assert_sign_symbol_none_value(std::money_base::pattern p) assert(p.field[3] == std::money_base::value); } -void assert_value_none_symbol_sign(std::money_base::pattern p) -{ - assert(p.field[0] == std::money_base::value); - assert(p.field[1] == std::money_base::none); - assert(p.field[2] == std::money_base::symbol); - assert(p.field[3] == std::money_base::sign); -} - void assert_sign_value_none_symbol(std::money_base::pattern p) { assert(p.field[0] == std::money_base::sign); @@ -149,39 +138,23 @@ int main(int, char**) { Fnf f(LOCALE_fr_FR_UTF_8, 1); std::money_base::pattern p = f.neg_format(); -#ifdef __APPLE__ - assert_value_none_symbol_sign(p); -#else assert_sign_value_none_symbol(p); -#endif } { Fnt f(LOCALE_fr_FR_UTF_8, 1); std::money_base::pattern p = f.neg_format(); -#ifdef __APPLE__ - assert_value_none_symbol_sign(p); -#else assert_sign_value_none_symbol(p); -#endif } #ifndef TEST_HAS_NO_WIDE_CHARACTERS { Fwf f(LOCALE_fr_FR_UTF_8, 1); std::money_base::pattern p = f.neg_format(); -#ifdef __APPLE__ - assert_value_none_symbol_sign(p); -#else assert_sign_value_none_symbol(p); -#endif } { Fwt f(LOCALE_fr_FR_UTF_8, 1); std::money_base::pattern p = f.neg_format(); -#ifdef __APPLE__ - assert_value_none_symbol_sign(p); -#else assert_sign_value_none_symbol(p); -#endif } #endif // TEST_HAS_NO_WIDE_CHARACTERS @@ -211,7 +184,7 @@ int main(int, char**) { Fnf f(LOCALE_zh_CN_UTF_8, 1); std::money_base::pattern p = f.neg_format(); -#ifdef _AIX +#if defined(_AIX) || defined(__APPLE__) assert_sign_symbol_none_value(p); #else assert_symbol_sign_none_value(p); @@ -220,7 +193,7 @@ int main(int, char**) { Fnt f(LOCALE_zh_CN_UTF_8, 1); std::money_base::pattern p = f.neg_format(); -#if defined(_WIN32) || defined(__APPLE__) +#if defined(_WIN32) assert_symbol_sign_none_value(p); #else assert_sign_symbol_none_value(p); @@ -230,7 +203,7 @@ int main(int, char**) { Fwf f(LOCALE_zh_CN_UTF_8, 1); std::money_base::pattern p = f.neg_format(); -#ifdef _AIX +#if defined(_AIX) || defined(__APPLE__) assert_sign_symbol_none_value(p); #else assert_symbol_sign_none_value(p); @@ -239,7 +212,7 @@ int main(int, char**) { Fwt f(LOCALE_zh_CN_UTF_8, 1); std::money_base::pattern p = f.neg_format(); -#if defined(_WIN32) || defined(__APPLE__) +#if defined(_WIN32) assert_symbol_sign_none_value(p); #else assert_sign_symbol_none_value(p); diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/pos_format.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/pos_format.pass.cpp index 671620a0c2f9..11832a7d8927 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/pos_format.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/pos_format.pass.cpp @@ -5,7 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// + // NetBSD does not support LC_MONETARY at the moment // XFAIL: netbsd @@ -79,14 +79,6 @@ void assert_sign_symbol_none_value(std::money_base::pattern p) assert(p.field[3] == std::money_base::value); } -void assert_value_none_symbol_sign(std::money_base::pattern p) -{ - assert(p.field[0] == std::money_base::value); - assert(p.field[1] == std::money_base::none); - assert(p.field[2] == std::money_base::symbol); - assert(p.field[3] == std::money_base::sign); -} - void assert_sign_value_none_symbol(std::money_base::pattern p) { assert(p.field[0] == std::money_base::sign); diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_double.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_double.pass.cpp index 612d3738a373..31682fea43bc 100644 --- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_double.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_double.pass.cpp @@ -6,12 +6,10 @@ // //===----------------------------------------------------------------------===// -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - // The fix for LWG2381 (https://github.com/llvm/llvm-project/pull/77948) changed behavior of // FP parsing. This requires 3e15c97fa3812993bdc319827a5c6d867b765ae8 in the dylib. -// XFAIL: using-built-library-before-llvm-19 +// TODO: Remove && !darwin once availability markup for LLVM 19 on macOS has been added +// XFAIL: using-built-library-before-llvm-19 && !darwin // <locale> diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_float.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_float.pass.cpp index 58bc9e5abef8..57eedc8633be 100644 --- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_float.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_float.pass.cpp @@ -6,12 +6,10 @@ // //===----------------------------------------------------------------------===// -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - // The fix for LWG2381 (https://github.com/llvm/llvm-project/pull/77948) changed behavior of // FP parsing. This requires 3e15c97fa3812993bdc319827a5c6d867b765ae8 in the dylib. -// XFAIL: using-built-library-before-llvm-19 +// TODO: Remove && !darwin once availability markup for LLVM 19 on macOS has been added +// XFAIL: using-built-library-before-llvm-19 && !darwin // <locale> diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long_double.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long_double.pass.cpp index bf8bb651d6bc..8324ee317014 100644 --- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long_double.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long_double.pass.cpp @@ -6,12 +6,10 @@ // //===----------------------------------------------------------------------===// -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - // The fix for LWG2381 (https://github.com/llvm/llvm-project/pull/77948) changed behavior of // FP parsing. This requires 3e15c97fa3812993bdc319827a5c6d867b765ae8 in the dylib. -// XFAIL: using-built-library-before-llvm-19 +// TODO: Remove && !darwin once availability markup for LLVM 19 on macOS has been added +// XFAIL: using-built-library-before-llvm-19 && !darwin // <locale> diff --git a/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/grouping.pass.cpp b/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/grouping.pass.cpp index a87c5e0ace28..11ec75469c70 100644 --- a/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/grouping.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/grouping.pass.cpp @@ -5,10 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// - -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - +// // NetBSD does not support LC_NUMERIC at the moment // XFAIL: netbsd @@ -63,7 +60,7 @@ int main(int, char**) } { std::locale l(LOCALE_fr_FR_UTF_8); -#if defined(TEST_HAS_GLIBC) || defined(_WIN32) || defined(_AIX) +#if defined(TEST_HAS_GLIBC) || defined(_WIN32) || defined(_AIX) || defined(__APPLE__) const char* const group = "\3"; #else const char* const group = "\x7f"; diff --git a/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/thousands_sep.pass.cpp b/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/thousands_sep.pass.cpp index ef39e8aa7b68..53f2c8554f3d 100644 --- a/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/thousands_sep.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/thousands_sep.pass.cpp @@ -6,9 +6,6 @@ // //===----------------------------------------------------------------------===// -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - // NetBSD does not support LC_NUMERIC at the moment // XFAIL: netbsd @@ -69,7 +66,7 @@ int main(int, char**) // The below tests work around GLIBC's use of U202F as LC_NUMERIC thousands_sep. std::locale l(LOCALE_fr_FR_UTF_8); { -#if defined(_CS_GNU_LIBC_VERSION) || defined(_WIN32) || defined(_AIX) +#if defined(_CS_GNU_LIBC_VERSION) || defined(_WIN32) || defined(_AIX) || defined(__APPLE__) const char sep = ' '; #else const char sep = ','; diff --git a/libcxx/test/std/strings/basic.string/string.capacity/deallocate_size.pass.cpp b/libcxx/test/std/strings/basic.string/string.capacity/deallocate_size.pass.cpp index 00f9e2b84678..ecdc39701641 100644 --- a/libcxx/test/std/strings/basic.string/string.capacity/deallocate_size.pass.cpp +++ b/libcxx/test/std/strings/basic.string/string.capacity/deallocate_size.pass.cpp @@ -12,12 +12,14 @@ #include <string> #include <cassert> +#include <cstddef> #include <cstdint> #include <type_traits> #include "test_macros.h" -static int allocated_; +static std::uint64_t allocated_; +static std::uint64_t deallocated_; template <class T, class Sz> struct test_alloc { @@ -41,12 +43,12 @@ struct test_alloc { pointer allocate(size_type n, const void* = nullptr) { allocated_ += n; - return std::allocator<value_type>().allocate(n); + return std::allocator<value_type>().allocate(static_cast<std::size_t>(n)); } void deallocate(pointer p, size_type s) { - allocated_ -= s; - std::allocator<value_type>().deallocate(p, s); + deallocated_ += s; + std::allocator<value_type>().deallocate(p, static_cast<std::size_t>(s)); } template <class U> @@ -64,14 +66,13 @@ struct test_alloc { template <class Sz> void test() { - for (int i = 1; i < 1000; ++i) { - using Str = std::basic_string<char, std::char_traits<char>, test_alloc<char, Sz> >; + for (unsigned int i = 1; i < 1000; ++i) { { - Str s(i, 't'); - assert(allocated_ == 0 || allocated_ >= i); + std::basic_string<char, std::char_traits<char>, test_alloc<char, Sz> > s(i, 't'); + (void)s; } + assert(allocated_ == deallocated_); } - assert(allocated_ == 0); } int main(int, char**) { diff --git a/libcxx/test/std/time/time.duration/time.duration.nonmember/ostream.pass.cpp b/libcxx/test/std/time/time.duration/time.duration.nonmember/ostream.pass.cpp index 4e84db9a84d7..97ac04275b0b 100644 --- a/libcxx/test/std/time/time.duration/time.duration.nonmember/ostream.pass.cpp +++ b/libcxx/test/std/time/time.duration/time.duration.nonmember/ostream.pass.cpp @@ -6,9 +6,6 @@ // //===----------------------------------------------------------------------===// -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: no-localization // UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME @@ -83,17 +80,10 @@ static void test_values() { assert(stream_c_locale<CharT>(1'000.123456s) == SV("1000.1235s")); if constexpr (std::same_as<CharT, char>) { -#if defined(__APPLE__) - assert(stream_fr_FR_locale<CharT>(-1'000'000s) == SV("-1000000s")); - assert(stream_fr_FR_locale<CharT>(1'000'000s) == SV("1000000s")); - assert(stream_fr_FR_locale<CharT>(-1'000.123456s) == SV("-1000,1235s")); - assert(stream_fr_FR_locale<CharT>(1'000.123456s) == SV("1000,1235s")); -#else assert(stream_fr_FR_locale<CharT>(-1'000'000s) == SV("-1 000 000s")); assert(stream_fr_FR_locale<CharT>(1'000'000s) == SV("1 000 000s")); assert(stream_fr_FR_locale<CharT>(-1'000.123456s) == SV("-1 000,1235s")); assert(stream_fr_FR_locale<CharT>(1'000.123456s) == SV("1 000,1235s")); -#endif } else { #ifndef TEST_HAS_NO_WIDE_CHARACTERS assert(stream_fr_FR_locale<CharT>(-1'000'000s) == L"-1" FR_THOU_SEP "000" FR_THOU_SEP "000s"); diff --git a/libcxx/test/std/time/time.syn/formatter.duration.pass.cpp b/libcxx/test/std/time/time.syn/formatter.duration.pass.cpp index 973bce8f81d4..f1f7debed246 100644 --- a/libcxx/test/std/time/time.syn/formatter.duration.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.duration.pass.cpp @@ -6,9 +6,6 @@ // //===----------------------------------------------------------------------===// -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: no-localization // UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME @@ -408,19 +405,11 @@ static void test_valid_positive_integral_values() { "%OM='00'\t" "%S='00'\t" "%OS='00'\t" -# if defined(__APPLE__) - "%p='AM'\t" -# else "%p='午前'\t" -# endif "%R='00:00'\t" "%T='00:00:00'\t" # if defined(__APPLE__) || defined(__FreeBSD__) -# if defined(__APPLE__) - "%r='12:00:00 AM'\t" -# else "%r='12:00:00 午前'\t" -# endif "%X='00時00分00秒'\t" "%EX='00時00分00秒'\t" # elif defined(_WIN32) @@ -448,19 +437,11 @@ static void test_valid_positive_integral_values() { "%OM='59'\t" "%S='59'\t" "%OS='59'\t" -# if defined(__APPLE__) - "%p='AM'\t" -# else "%p='午前'\t" -# endif "%R='11:59'\t" "%T='11:59:59'\t" # if defined(__APPLE__) || defined(__FreeBSD__) -# if defined(__APPLE__) - "%r='11:59:59 AM'\t" -# else "%r='11:59:59 午前'\t" -# endif "%X='11時59分59秒'\t" "%EX='11時59分59秒'\t" # elif defined(_WIN32) @@ -488,19 +469,11 @@ static void test_valid_positive_integral_values() { "%OM='00'\t" "%S='00'\t" "%OS='00'\t" -# if defined(__APPLE__) - "%p='PM'\t" -# else "%p='午後'\t" -# endif "%R='12:00'\t" "%T='12:00:00'\t" # if defined(__APPLE__) || defined(__FreeBSD__) -# if defined(__APPLE__) - "%r='12:00:00 PM'\t" -# else "%r='12:00:00 午後'\t" -# endif "%X='12時00分00秒'\t" "%EX='12時00分00秒'\t" # else @@ -528,19 +501,11 @@ static void test_valid_positive_integral_values() { "%OM='59'\t" "%S='59'\t" "%OS='59'\t" -# if defined(__APPLE__) - "%p='PM'\t" -# else "%p='午後'\t" -# endif "%R='23:59'\t" "%T='23:59:59'\t" # if defined(__APPLE__) || defined(__FreeBSD__) -# if defined(__APPLE__) - "%r='11:59:59 PM'\t" -# else "%r='11:59:59 午後'\t" -# endif "%X='23時59分59秒'\t" "%EX='23時59分59秒'\t" # else @@ -568,19 +533,11 @@ static void test_valid_positive_integral_values() { "%OM='00'\t" "%S='00'\t" "%OS='00'\t" -# if defined(__APPLE__) - "%p='AM'\t" -# else "%p='午前'\t" -# endif "%R='00:00'\t" "%T='00:00:00'\t" # if defined(__APPLE__) || defined(__FreeBSD__) -# if defined(__APPLE__) - "%r='12:00:00 AM'\t" -# else "%r='12:00:00 午前'\t" -# endif "%X='00時00分00秒'\t" "%EX='00時00分00秒'\t" # elif defined(_WIN32) @@ -835,19 +792,11 @@ static void test_valid_negative_integral_values() { "%OM='59'\t" "%S='59'\t" "%OS='59'\t" -# if defined(__APPLE__) - "%p='PM'\t" -# else "%p='午後'\t" -# endif "%R='23:59'\t" "%T='23:59:59'\t" # if defined(__APPLE__) || defined(__FreeBSD__) -# if defined(__APPLE__) - "%r='11:59:59 PM'\t" -# else "%r='11:59:59 午後'\t" -# endif "%X='23時59分59秒'\t" "%EX='23時59分59秒'\t" # elif defined(_WIN32) diff --git a/libcxx/test/std/time/time.syn/formatter.file_time.pass.cpp b/libcxx/test/std/time/time.syn/formatter.file_time.pass.cpp index 28a972b19dce..e258c4161eda 100644 --- a/libcxx/test/std/time/time.syn/formatter.file_time.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.file_time.pass.cpp @@ -6,9 +6,6 @@ // //===----------------------------------------------------------------------===// -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: no-localization // UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME @@ -695,19 +692,11 @@ static void test_valid_values_time() { "%OM='00'\t" "%S='00'\t" "%OS='00'\t" -# if defined(__APPLE__) - "%p='AM'\t" -# else "%p='午前'\t" -# endif "%R='00:00'\t" "%T='00:00:00'\t" # if defined(__APPLE__) || defined(__FreeBSD__) -# if defined(__APPLE__) - "%r='12:00:00 AM'\t" -# else "%r='12:00:00 午前'\t" -# endif "%X='00時00分00秒'\t" "%EX='00時00分00秒'\t" # elif defined(_WIN32) @@ -732,19 +721,11 @@ static void test_valid_values_time() { "%OM='31'\t" "%S='30.123'\t" "%OS='30.123'\t" -# if defined(__APPLE__) - "%p='PM'\t" -# else "%p='午後'\t" -# endif "%R='23:31'\t" "%T='23:31:30.123'\t" # if defined(__APPLE__) || defined(__FreeBSD__) -# if defined(__APPLE__) - "%r='11:31:30 PM'\t" -# else "%r='11:31:30 午後'\t" -# endif "%X='23時31分30秒'\t" "%EX='23時31分30秒'\t" # elif defined(_WIN32) diff --git a/libcxx/test/std/time/time.syn/formatter.hh_mm_ss.pass.cpp b/libcxx/test/std/time/time.syn/formatter.hh_mm_ss.pass.cpp index 82d9b4c7540a..bbd9c074bef2 100644 --- a/libcxx/test/std/time/time.syn/formatter.hh_mm_ss.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.hh_mm_ss.pass.cpp @@ -6,9 +6,6 @@ // //===----------------------------------------------------------------------===// -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: no-localization // UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME @@ -302,19 +299,11 @@ static void test_valid_values() { "%OM='00'\t" "%S='00'\t" "%OS='00'\t" -# if defined(__APPLE__) - "%p='AM'\t" -# else "%p='午前'\t" -# endif "%R='00:00'\t" "%T='00:00:00'\t" # if defined(__APPLE__) || defined(__FreeBSD__) -# if defined(__APPLE__) - "%r='12:00:00 AM'\t" -# else "%r='12:00:00 午前'\t" -# endif "%X='00時00分00秒'\t" "%EX='00時00分00秒'\t" # elif defined(_WIN32) @@ -339,19 +328,11 @@ static void test_valid_values() { "%OM='31'\t" "%S='30.123'\t" "%OS='30.123'\t" -# if defined(__APPLE__) - "%p='PM'\t" -# else "%p='午後'\t" -# endif "%R='23:31'\t" "%T='23:31:30.123'\t" # if defined(__APPLE__) || defined(__FreeBSD__) -# if defined(__APPLE__) - "%r='11:31:30 PM'\t" -# else "%r='11:31:30 午後'\t" -# endif "%X='23時31分30秒'\t" "%EX='23時31分30秒'\t" # elif defined(_WIN32) @@ -376,19 +357,11 @@ static void test_valid_values() { "%OM='02'\t" "%S='01.123456789012'\t" "%OS='01.123456789012'\t" -# if defined(__APPLE__) - "%p='AM'\t" -# else "%p='午前'\t" -# endif "%R='03:02'\t" "%T='03:02:01.123456789012'\t" # if defined(__APPLE__) || defined(__FreeBSD__) -# if defined(__APPLE__) - "%r='03:02:01 AM'\t" -# else "%r='03:02:01 午前'\t" -# endif "%X='03時02分01秒'\t" "%EX='03時02分01秒'\t" # elif defined(_WIN32) @@ -413,19 +386,11 @@ static void test_valid_values() { "%OM='01'\t" "%S='01'\t" "%OS='01'\t" -# if defined(__APPLE__) - "%p='AM'\t" -# else "%p='午前'\t" -# endif "%R='01:01'\t" "%T='01:01:01'\t" # if defined(__APPLE__) || defined(__FreeBSD__) -# if defined(__APPLE__) - "%r='01:01:01 AM'\t" -# else "%r='01:01:01 午前'\t" -# endif "%X='01時01分01秒'\t" "%EX='01時01分01秒'\t" # elif defined(_WIN32) diff --git a/libcxx/test/std/time/time.syn/formatter.local_time.pass.cpp b/libcxx/test/std/time/time.syn/formatter.local_time.pass.cpp index bd23337ccb31..ce3af8ec199a 100644 --- a/libcxx/test/std/time/time.syn/formatter.local_time.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.local_time.pass.cpp @@ -6,9 +6,6 @@ // //===----------------------------------------------------------------------===// -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: no-localization // UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME @@ -694,19 +691,11 @@ static void test_valid_values_time() { "%OM='00'\t" "%S='00'\t" "%OS='00'\t" -# if defined(__APPLE__) - "%p='AM'\t" -# else "%p='午前'\t" -# endif "%R='00:00'\t" "%T='00:00:00'\t" # if defined(__APPLE__) || defined(__FreeBSD__) -# if defined(__APPLE__) - "%r='12:00:00 AM'\t" -# else "%r='12:00:00 午前'\t" -# endif "%X='00時00分00秒'\t" "%EX='00時00分00秒'\t" # elif defined(_WIN32) @@ -731,19 +720,11 @@ static void test_valid_values_time() { "%OM='31'\t" "%S='30.123'\t" "%OS='30.123'\t" -# if defined(__APPLE__) - "%p='PM'\t" -# else "%p='午後'\t" -# endif "%R='23:31'\t" "%T='23:31:30.123'\t" # if defined(__APPLE__) || defined(__FreeBSD__) -# if defined(__APPLE__) - "%r='11:31:30 PM'\t" -# else "%r='11:31:30 午後'\t" -# endif "%X='23時31分30秒'\t" "%EX='23時31分30秒'\t" # elif defined(_WIN32) diff --git a/libcxx/test/std/time/time.syn/formatter.sys_time.pass.cpp b/libcxx/test/std/time/time.syn/formatter.sys_time.pass.cpp index 9c9c8e0de1e9..9238f3daf1f8 100644 --- a/libcxx/test/std/time/time.syn/formatter.sys_time.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.sys_time.pass.cpp @@ -6,9 +6,6 @@ // //===----------------------------------------------------------------------===// -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: no-localization // UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME @@ -691,19 +688,11 @@ static void test_valid_values_time() { "%OM='00'\t" "%S='00'\t" "%OS='00'\t" -# if defined(__APPLE__) - "%p='AM'\t" -# else "%p='午前'\t" -# endif "%R='00:00'\t" "%T='00:00:00'\t" # if defined(__APPLE__) || defined(__FreeBSD__) -# if defined(__APPLE__) - "%r='12:00:00 AM'\t" -# else "%r='12:00:00 午前'\t" -# endif "%X='00時00分00秒'\t" "%EX='00時00分00秒'\t" # elif defined(_WIN32) @@ -728,19 +717,11 @@ static void test_valid_values_time() { "%OM='31'\t" "%S='30.123'\t" "%OS='30.123'\t" -# if defined(__APPLE__) - "%p='PM'\t" -# else "%p='午後'\t" -# endif "%R='23:31'\t" "%T='23:31:30.123'\t" # if defined(__APPLE__) || defined(__FreeBSD__) -# if defined(__APPLE__) - "%r='11:31:30 PM'\t" -# else "%r='11:31:30 午後'\t" -# endif "%X='23時31分30秒'\t" "%EX='23時31分30秒'\t" # elif defined(_WIN32) diff --git a/libcxx/test/support/locale_helpers.h b/libcxx/test/support/locale_helpers.h index 946c2fed0f3a..3cec7397e3d7 100644 --- a/libcxx/test/support/locale_helpers.h +++ b/libcxx/test/support/locale_helpers.h @@ -73,6 +73,12 @@ MultiStringType currency_symbol_ru_RU() { return MKSTR("\u20BD"); // U+20BD RUBLE SIGN #elif defined(_WIN32) || defined(__FreeBSD__) || defined(_AIX) return MKSTR("\u20BD"); // U+20BD RUBLE SIGN +#elif defined(__APPLE__) + if (__builtin_available(macOS 15.4, *)) { + return MKSTR("\u20BD"); // U+20BD RUBLE SIGN + } else { + return MKSTR("\u0440\u0443\u0431."); + } #else return MKSTR("\u0440\u0443\u0431."); #endif @@ -81,6 +87,12 @@ MultiStringType currency_symbol_ru_RU() { MultiStringType currency_symbol_zh_CN() { #if defined(_WIN32) return MKSTR("\u00A5"); // U+00A5 YEN SIGN +#elif defined(__APPLE__) + if (__builtin_available(macOS 15.4, *)) { + return MKSTR("\u00A5"); // U+00A5 YEN SIGN + } else { + return MKSTR("\uFFE5"); // U+FFE5 FULLWIDTH YEN SIGN + } #else return MKSTR("\uFFE5"); // U+FFE5 FULLWIDTH YEN SIGN #endif diff --git a/libcxxabi/test/uncaught_exception.pass.cpp b/libcxxabi/test/uncaught_exception.pass.cpp index 8e8468c43240..e97732006e11 100644 --- a/libcxxabi/test/uncaught_exception.pass.cpp +++ b/libcxxabi/test/uncaught_exception.pass.cpp @@ -6,9 +6,6 @@ // //===----------------------------------------------------------------------===// -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - // UNSUPPORTED: no-exceptions // This tests that libc++abi still provides __cxa_uncaught_exception() for @@ -18,7 +15,8 @@ // to undefined symbols when linking against a libc++ that re-exports the symbols, // but running against a libc++ that doesn't. Fortunately, usage of __cxa_uncaught_exception() // in the wild seems to be close to non-existent. -// XFAIL: using-built-library-before-llvm-19 +// TODO: Remove && !darwin once availability markup for LLVM 19 on macOS has been added +// XFAIL: using-built-library-before-llvm-19 && !darwin #include <cxxabi.h> #include <cassert> diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index bbf4b29a9fda..a4150ebfa165 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -2749,14 +2749,13 @@ RelroPaddingSection::RelroPaddingSection(Ctx &ctx) : SyntheticSection(ctx, ".relro_padding", SHT_NOBITS, SHF_ALLOC | SHF_WRITE, 1) {} -RandomizePaddingSection::RandomizePaddingSection(Ctx &ctx, uint64_t size, - OutputSection *parent) - : SyntheticSection(ctx, ".randomize_padding", SHT_PROGBITS, SHF_ALLOC, 1), +PaddingSection::PaddingSection(Ctx &ctx, uint64_t size, OutputSection *parent) + : SyntheticSection(ctx, ".padding", SHT_PROGBITS, SHF_ALLOC, 1), size(size) { this->parent = parent; } -void RandomizePaddingSection::writeTo(uint8_t *buf) { +void PaddingSection::writeTo(uint8_t *buf) { std::array<uint8_t, 4> filler = getParent()->getFiller(ctx); uint8_t *end = buf + size; for (; buf + 4 <= end; buf += 4) diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h index ac3ec63f0a7a..38e68110e4bc 100644 --- a/lld/ELF/SyntheticSections.h +++ b/lld/ELF/SyntheticSections.h @@ -779,11 +779,11 @@ public: void writeTo(uint8_t *buf) override {} }; -class RandomizePaddingSection final : public SyntheticSection { +class PaddingSection final : public SyntheticSection { uint64_t size; public: - RandomizePaddingSection(Ctx &ctx, uint64_t size, OutputSection *parent); + PaddingSection(Ctx &ctx, uint64_t size, OutputSection *parent); size_t getSize() const override { return size; } void writeTo(uint8_t *buf) override; }; diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 4fa80397cbfa..083b4fb1dbd2 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -1495,15 +1495,14 @@ static void randomizeSectionPadding(Ctx &ctx) { if (auto *isd = dyn_cast<InputSectionDescription>(bc)) { SmallVector<InputSection *, 0> tmp; if (os->ptLoad != curPtLoad) { - tmp.push_back(make<RandomizePaddingSection>( - ctx, g() % ctx.arg.maxPageSize, os)); + tmp.push_back( + make<PaddingSection>(ctx, g() % ctx.arg.maxPageSize, os)); curPtLoad = os->ptLoad; } for (InputSection *isec : isd->sections) { // Probability of inserting padding is 1 in 16. if (g() % 16 == 0) - tmp.push_back( - make<RandomizePaddingSection>(ctx, isec->addralign, os)); + tmp.push_back(make<PaddingSection>(ctx, isec->addralign, os)); tmp.push_back(isec); } isd->sections = std::move(tmp); diff --git a/lldb/bindings/lua/lua-typemaps.swig b/lldb/bindings/lua/lua-typemaps.swig index 56756936a532..f2a740141936 100644 --- a/lldb/bindings/lua/lua-typemaps.swig +++ b/lldb/bindings/lua/lua-typemaps.swig @@ -121,9 +121,27 @@ LLDB_NUMBER_TYPEMAP(enum SWIGTYPE); $1 = (char *)malloc($2); } +// Disable default type checking for this method to avoid SWIG dispatch issues. +// +// Problem: SBThread::GetStopDescription has two overloads: +// 1. GetStopDescription(char* dst_or_null, size_t dst_len) +// 2. GetStopDescription(lldb::SBStream& stream) +// +// SWIG generates a dispatch function to select the correct overload based on argument types. +// see https://www.swig.org/Doc4.0/SWIGDocumentation.html#Typemaps_overloading. +// However, this dispatcher doesn't consider typemaps that transform function signatures. +// +// In lua, our typemap converts GetStopDescription(char*, size_t) to GetStopDescription(int). +// The dispatcher still checks against the original (char*, size_t) signature instead of +// the transformed (int) signature, causing type matching to fail. +// This only affects SBThread::GetStopDescription since the type check also matches +// the argument name, which is unique to this function. +%typemap(typecheck, precedence=SWIG_TYPECHECK_POINTER) (char *dst_or_null, size_t dst_len) "" + %typemap(argout) (char *dst_or_null, size_t dst_len) { lua_pop(L, 1); // Blow away the previous result - lua_pushlstring(L, (const char *)$1, $result); + llvm::StringRef ref($1); + lua_pushlstring(L, (const char *)$1, ref.size()); free($1); // SWIG_arg was already incremented } diff --git a/lldb/bindings/python/python-typemaps.swig b/lldb/bindings/python/python-typemaps.swig index 715914fe745f..4d3a95768f2f 100644 --- a/lldb/bindings/python/python-typemaps.swig +++ b/lldb/bindings/python/python-typemaps.swig @@ -224,6 +224,24 @@ AND call SWIG_fail at the same time, because it will result in a double free. } $1 = (char *)malloc($2); } + +// Disable default type checking for this method to avoid SWIG dispatch issues. +// +// Problem: SBThread::GetStopDescription has two overloads: +// 1. GetStopDescription(char* dst_or_null, size_t dst_len) +// 2. GetStopDescription(lldb::SBStream& stream) +// +// SWIG generates a dispatch function to select the correct overload based on argument types. +// see https://www.swig.org/Doc4.0/SWIGDocumentation.html#Typemaps_overloading. +// However, this dispatcher doesn't consider typemaps that transform function signatures. +// +// In Python, our typemap converts GetStopDescription(char*, size_t) to GetStopDescription(int). +// The dispatcher still checks against the original (char*, size_t) signature instead of +// the transformed (int) signature, causing type matching to fail. +// This only affects SBThread::GetStopDescription since the type check also matches +// the argument name, which is unique to this function. +%typemap(typecheck, precedence=SWIG_TYPECHECK_POINTER) (char *dst_or_null, size_t dst_len) "" + %typemap(argout) (char *dst_or_null, size_t dst_len) { Py_XDECREF($result); /* Blow away any previous result */ llvm::StringRef ref($1); diff --git a/lldb/include/lldb/API/SBThread.h b/lldb/include/lldb/API/SBThread.h index e9fe5858d125..2411dfd37651 100644 --- a/lldb/include/lldb/API/SBThread.h +++ b/lldb/include/lldb/API/SBThread.h @@ -81,6 +81,14 @@ public: SBThreadCollection GetStopReasonExtendedBacktraces(InstrumentationRuntimeType type); + /// Gets a human-readable description of why the thread stopped. + /// + /// \param stream Output stream to receive the stop description text + /// \return + /// true if obtained and written to the stream, + // false if there was an error retrieving the description. + bool GetStopDescription(lldb::SBStream &stream) const; + size_t GetStopDescription(char *dst_or_null, size_t dst_len); SBValue GetStopReturnValue(); diff --git a/lldb/include/lldb/Target/Process.h b/lldb/include/lldb/Target/Process.h index 8f5892e16ced..c1f9785e76f9 100644 --- a/lldb/include/lldb/Target/Process.h +++ b/lldb/include/lldb/Target/Process.h @@ -127,10 +127,7 @@ class ProcessAttachInfo : public ProcessInstanceInfo { public: ProcessAttachInfo() = default; - ProcessAttachInfo(const ProcessLaunchInfo &launch_info) - : m_resume_count(0), m_wait_for_launch(false), m_ignore_existing(true), - m_continue_once_attached(false), m_detach_on_error(true), - m_async(false) { + ProcessAttachInfo(const ProcessLaunchInfo &launch_info) { ProcessInfo::operator=(launch_info); SetProcessPluginName(launch_info.GetProcessPluginName()); SetResumeCount(launch_info.GetResumeCount()); diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py index d892c01f0bc7..8f3652172dfd 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py @@ -10,8 +10,8 @@ import string import subprocess import signal import sys -import threading import warnings +import selectors import time from typing import ( Any, @@ -139,35 +139,6 @@ def dump_memory(base_addr, data, num_per_line, outfile): outfile.write("\n") -def read_packet( - f: IO[bytes], trace_file: Optional[IO[str]] = None -) -> Optional[ProtocolMessage]: - """Decode a JSON packet that starts with the content length and is - followed by the JSON bytes from a file 'f'. Returns None on EOF. - """ - line = f.readline().decode("utf-8") - if len(line) == 0: - return None # EOF. - - # Watch for line that starts with the prefix - prefix = "Content-Length: " - if line.startswith(prefix): - # Decode length of JSON bytes - length = int(line[len(prefix) :]) - # Skip empty line - separator = f.readline().decode() - if separator != "": - Exception("malformed DAP content header, unexpected line: " + separator) - # Read JSON bytes - json_str = f.read(length).decode() - if trace_file: - trace_file.write("from adapter:\n%s\n" % (json_str)) - # Decode the JSON bytes into a python dictionary - return json.loads(json_str) - - raise Exception("unexpected malformed message from lldb-dap: " + line) - - def packet_type_is(packet, packet_type): return "type" in packet and packet["type"] == packet_type @@ -199,16 +170,8 @@ class DebugCommunication(object): self.log_file = log_file self.send = send self.recv = recv - - # Packets that have been received and processed but have not yet been - # requested by a test case. - self._pending_packets: List[Optional[ProtocolMessage]] = [] - # Received packets that have not yet been processed. - self._recv_packets: List[Optional[ProtocolMessage]] = [] - # Used as a mutex for _recv_packets and for notify when _recv_packets - # changes. - self._recv_condition = threading.Condition() - self._recv_thread = threading.Thread(target=self._read_packet_thread) + self.selector = selectors.DefaultSelector() + self.selector.register(recv, selectors.EVENT_READ) # session state self.init_commands = init_commands @@ -234,9 +197,6 @@ class DebugCommunication(object): # keyed by breakpoint id self.resolved_breakpoints: dict[str, Breakpoint] = {} - # trigger enqueue thread - self._recv_thread.start() - @classmethod def encode_content(cls, s: str) -> bytes: return ("Content-Length: %u\r\n\r\n%s" % (len(s), s)).encode("utf-8") @@ -252,17 +212,46 @@ class DebugCommunication(object): f"seq mismatch in response {command['seq']} != {response['request_seq']}" ) - def _read_packet_thread(self): - try: - while True: - packet = read_packet(self.recv, trace_file=self.trace_file) - # `packet` will be `None` on EOF. We want to pass it down to - # handle_recv_packet anyway so the main thread can handle unexpected - # termination of lldb-dap and stop waiting for new packets. - if not self._handle_recv_packet(packet): - break - finally: - dump_dap_log(self.log_file) + def _read_packet( + self, + timeout: float = DEFAULT_TIMEOUT, + ) -> Optional[ProtocolMessage]: + """Decode a JSON packet that starts with the content length and is + followed by the JSON bytes from self.recv. Returns None on EOF. + """ + + ready = self.selector.select(timeout) + if not ready: + warnings.warn( + "timeout occurred waiting for a packet, check if the test has a" + " negative assertion and see if it can be inverted.", + stacklevel=4, + ) + return None # timeout + + line = self.recv.readline().decode("utf-8") + if len(line) == 0: + return None # EOF. + + # Watch for line that starts with the prefix + prefix = "Content-Length: " + if line.startswith(prefix): + # Decode length of JSON bytes + length = int(line[len(prefix) :]) + # Skip empty line + separator = self.recv.readline().decode() + if separator != "": + Exception("malformed DAP content header, unexpected line: " + separator) + # Read JSON bytes + json_str = self.recv.read(length).decode() + if self.trace_file: + self.trace_file.write( + "%s from adapter:\n%s\n" % (time.time(), json_str) + ) + # Decode the JSON bytes into a python dictionary + return json.loads(json_str) + + raise Exception("unexpected malformed message from lldb-dap: " + line) def get_modules( self, start_module: Optional[int] = None, module_count: Optional[int] = None @@ -310,34 +299,6 @@ class DebugCommunication(object): output += self.get_output(category, clear=clear) return output - def _enqueue_recv_packet(self, packet: Optional[ProtocolMessage]): - with self.recv_condition: - self.recv_packets.append(packet) - self.recv_condition.notify() - - def _handle_recv_packet(self, packet: Optional[ProtocolMessage]) -> bool: - """Handles an incoming packet. - - Called by the read thread that is waiting for all incoming packets - to store the incoming packet in "self._recv_packets" in a thread safe - way. This function will then signal the "self._recv_condition" to - indicate a new packet is available. - - Args: - packet: A new packet to store. - - Returns: - True if the caller should keep calling this function for more - packets. - """ - with self._recv_condition: - self._recv_packets.append(packet) - self._recv_condition.notify() - # packet is None on EOF - return packet is not None and not ( - packet["type"] == "response" and packet["command"] == "disconnect" - ) - def _recv_packet( self, *, @@ -361,46 +322,34 @@ class DebugCommunication(object): The first matching packet for the given predicate, if specified, otherwise None. """ - assert ( - threading.current_thread != self._recv_thread - ), "Must not be called from the _recv_thread" - - def process_until_match(): - self._process_recv_packets() - for i, packet in enumerate(self._pending_packets): - if packet is None: - # We need to return a truthy value to break out of the - # wait_for, use `EOFError` as an indicator of EOF. - return EOFError() - if predicate and predicate(packet): - self._pending_packets.pop(i) - return packet - - with self._recv_condition: - packet = self._recv_condition.wait_for(process_until_match, timeout) - return None if isinstance(packet, EOFError) else packet - - def _process_recv_packets(self) -> None: + deadline = time.time() + timeout + + while time.time() < deadline: + packet = self._read_packet(timeout=deadline - time.time()) + if packet is None: + return None + self._process_recv_packet(packet) + if not predicate or predicate(packet): + return packet + + def _process_recv_packet(self, packet) -> None: """Process received packets, updating the session state.""" - with self._recv_condition: - for packet in self._recv_packets: - if packet and ("seq" not in packet or packet["seq"] == 0): - warnings.warn( - f"received a malformed packet, expected 'seq != 0' for {packet!r}" - ) - # Handle events that may modify any stateful properties of - # the DAP session. - if packet and packet["type"] == "event": - self._handle_event(packet) - elif packet and packet["type"] == "request": - # Handle reverse requests and keep processing. - self._handle_reverse_request(packet) - # Move the packet to the pending queue. - self._pending_packets.append(packet) - self._recv_packets.clear() + if packet and ("seq" not in packet or packet["seq"] == 0): + warnings.warn( + f"received a malformed packet, expected 'seq != 0' for {packet!r}" + ) + # Handle events that may modify any stateful properties of + # the DAP session. + if packet and packet["type"] == "event": + self._handle_event(packet) + elif packet and packet["type"] == "request": + # Handle reverse requests and keep processing. + self._handle_reverse_request(packet) def _handle_event(self, packet: Event) -> None: """Handle any events that modify debug session state we track.""" + self.events.append(packet) + event = packet["event"] body: Optional[Dict] = packet.get("body", None) @@ -453,6 +402,8 @@ class DebugCommunication(object): self.invalidated_event = packet elif event == "memory": self.memory_event = packet + elif event == "module": + self.module_events.append(packet) def _handle_reverse_request(self, request: Request) -> None: if request in self.reverse_requests: @@ -521,18 +472,14 @@ class DebugCommunication(object): Returns the seq number of the request. """ - # Set the seq for requests. - if packet["type"] == "request": - packet["seq"] = self.sequence - self.sequence += 1 - else: - packet["seq"] = 0 + packet["seq"] = self.sequence + self.sequence += 1 # Encode our command dictionary as a JSON string json_str = json.dumps(packet, separators=(",", ":")) if self.trace_file: - self.trace_file.write("to adapter:\n%s\n" % (json_str)) + self.trace_file.write("%s to adapter:\n%s\n" % (time.time(), json_str)) length = len(json_str) if length > 0: @@ -913,6 +860,8 @@ class DebugCommunication(object): if restartArguments: command_dict["arguments"] = restartArguments + # Clear state, the process is about to restart... + self._process_continued(True) response = self._send_recv(command_dict) # Caller must still call wait_for_stopped. return response @@ -1479,8 +1428,10 @@ class DebugCommunication(object): def terminate(self): self.send.close() - if self._recv_thread.is_alive(): - self._recv_thread.join() + self.recv.close() + self.selector.close() + if self.log_file: + dump_dap_log(self.log_file) def request_setInstructionBreakpoints(self, memory_reference=[]): breakpoints = [] @@ -1577,6 +1528,7 @@ class DebugAdapterServer(DebugCommunication): stdout=subprocess.PIPE, stderr=sys.stderr, env=adapter_env, + bufsize=0, ) if connection is None: diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py index 29935bb8046f..405e91fc2dc3 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py @@ -15,6 +15,8 @@ import base64 # DAP tests as a whole have been flakey on the Windows on Arm bot. See: # https://github.com/llvm/llvm-project/issues/137660 @skipIf(oslist=["windows"], archs=["aarch64"]) +# The Arm Linux bot needs stable resources before it can run these tests reliably. +@skipIf(oslist=["linux"], archs=["arm$"]) class DAPTestCaseBase(TestBase): # set timeout based on whether ASAN was enabled or not. Increase # timeout by a factor of 10 if ASAN is enabled. @@ -416,7 +418,7 @@ class DAPTestCaseBase(TestBase): return self.dap_server.wait_for_stopped() def continue_to_breakpoint(self, breakpoint_id: str): - self.continue_to_breakpoints((breakpoint_id)) + self.continue_to_breakpoints([breakpoint_id]) def continue_to_breakpoints(self, breakpoint_ids): self.do_continue() diff --git a/lldb/source/API/SBThread.cpp b/lldb/source/API/SBThread.cpp index 4e4aa48bc9a2..f58a1b52afa9 100644 --- a/lldb/source/API/SBThread.cpp +++ b/lldb/source/API/SBThread.cpp @@ -239,11 +239,34 @@ SBThread::GetStopReasonExtendedBacktraces(InstrumentationRuntimeType type) { return threads; } -size_t SBThread::GetStopDescription(char *dst, size_t dst_len) { - LLDB_INSTRUMENT_VA(this, dst, dst_len); +bool SBThread::GetStopDescription(lldb::SBStream &stream) const { + LLDB_INSTRUMENT_VA(this, stream); + + if (!m_opaque_sp) + return false; + + llvm::Expected<StoppedExecutionContext> exe_ctx = + GetStoppedExecutionContext(m_opaque_sp); + if (!exe_ctx) { + LLDB_LOG_ERROR(GetLog(LLDBLog::API), exe_ctx.takeError(), "{0}"); + return false; + } + + if (!exe_ctx->HasThreadScope()) + return false; + + Stream &strm = stream.ref(); + const std::string stop_desc = exe_ctx->GetThreadPtr()->GetStopDescription(); + strm.PutCString(stop_desc); + + return true; +} + +size_t SBThread::GetStopDescription(char *dst_or_null, size_t dst_len) { + LLDB_INSTRUMENT_VA(this, dst_or_null, dst_len); - if (dst) - *dst = 0; + if (dst_or_null) + *dst_or_null = 0; llvm::Expected<StoppedExecutionContext> exe_ctx = GetStoppedExecutionContext(m_opaque_sp); @@ -259,8 +282,8 @@ size_t SBThread::GetStopDescription(char *dst, size_t dst_len) { if (thread_stop_desc.empty()) return 0; - if (dst) - return ::snprintf(dst, dst_len, "%s", thread_stop_desc.c_str()) + 1; + if (dst_or_null) + return ::snprintf(dst_or_null, dst_len, "%s", thread_stop_desc.c_str()) + 1; // NULL dst passed in, return the length needed to contain the // description. diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp index 9cdb8467bfc6..c8e520d687f6 100644 --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp @@ -1674,6 +1674,10 @@ void ObjectFileMachO::ProcessSegmentCommand( uint32_t segment_sect_idx; const lldb::user_id_t first_segment_sectID = context.NextSectionIdx + 1; + // 64 bit mach-o files have sections with 32 bit file offsets. If any section + // data end will exceed UINT32_MAX, then we need to do some bookkeeping to + // ensure we can access this data correctly. + uint64_t section_offset_adjust = 0; const uint32_t num_u32s = load_cmd.cmd == LC_SEGMENT ? 7 : 8; for (segment_sect_idx = 0; segment_sect_idx < load_cmd.nsects; ++segment_sect_idx) { @@ -1697,6 +1701,16 @@ void ObjectFileMachO::ProcessSegmentCommand( // isn't stored in the abstracted Sections. m_mach_sections.push_back(sect64); + // Make sure we can load sections in mach-o files where some sections cross + // a 4GB boundary. llvm::MachO::section_64 have only 32 bit file offsets + // for the file offset of the section contents, so we need to track and + // sections that overflow and adjust the offsets accordingly. + const uint64_t section_file_offset = + (uint64_t)sect64.offset + section_offset_adjust; + const uint64_t end_section_offset = (uint64_t)sect64.offset + sect64.size; + if (end_section_offset >= UINT32_MAX) + section_offset_adjust += end_section_offset & 0xFFFFFFFF00000000ull; + if (add_section) { ConstString section_name( sect64.sectname, strnlen(sect64.sectname, sizeof(sect64.sectname))); @@ -1736,13 +1750,13 @@ void ObjectFileMachO::ProcessSegmentCommand( } // Grow the section size as needed. - if (sect64.offset) { + if (section_file_offset) { const lldb::addr_t segment_min_file_offset = segment->GetFileOffset(); const lldb::addr_t segment_max_file_offset = segment_min_file_offset + segment->GetFileSize(); - const lldb::addr_t section_min_file_offset = sect64.offset; + const lldb::addr_t section_min_file_offset = section_file_offset; const lldb::addr_t section_max_file_offset = section_min_file_offset + sect64.size; const lldb::addr_t new_file_offset = @@ -1769,10 +1783,10 @@ void ObjectFileMachO::ProcessSegmentCommand( // other sections. sect64.addr, // File VM address == addresses as they are // found in the object file - sect64.size, // VM size in bytes of this section - sect64.offset, // Offset to the data for this section in + sect64.size, // VM size in bytes of this section + section_file_offset, // Offset to the data for this section in // the file - sect64.offset ? sect64.size : 0, // Size in bytes of + section_file_offset ? sect64.size : 0, // Size in bytes of // this section as // found in the file sect64.align, @@ -1792,14 +1806,14 @@ void ObjectFileMachO::ProcessSegmentCommand( SectionSP section_sp(new Section( segment_sp, module_sp, this, ++context.NextSectionIdx, section_name, sect_type, sect64.addr - segment_sp->GetFileAddress(), sect64.size, - sect64.offset, sect64.offset == 0 ? 0 : sect64.size, sect64.align, - sect64.flags)); + section_file_offset, section_file_offset == 0 ? 0 : sect64.size, + sect64.align, sect64.flags)); // Set the section to be encrypted to match the segment bool section_is_encrypted = false; if (!segment_is_encrypted && load_cmd.filesize != 0) section_is_encrypted = context.EncryptedRanges.FindEntryThatContains( - sect64.offset) != nullptr; + section_file_offset) != nullptr; section_sp->SetIsEncrypted(segment_is_encrypted || section_is_encrypted); section_sp->SetPermissions(segment_permissions); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index 36bc17680f3f..c049829f3721 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -450,6 +450,10 @@ ParsedDWARFTypeAttributes::ParsedDWARFTypeAttributes(const DWARFDIE &die) { byte_size = form_value.Unsigned(); break; + case DW_AT_bit_size: + data_bit_size = form_value.Unsigned(); + break; + case DW_AT_alignment: alignment = form_value.Unsigned(); break; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h index da58f4c14622..f5f707129d67 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h @@ -574,6 +574,7 @@ struct ParsedDWARFTypeAttributes { lldb_private::plugin::dwarf::DWARFFormValue type; lldb::LanguageType class_language = lldb::eLanguageTypeUnknown; std::optional<uint64_t> byte_size; + std::optional<uint64_t> data_bit_size; std::optional<uint64_t> alignment; size_t calling_convention = llvm::dwarf::DW_CC_normal; uint32_t bit_stride = 0; diff --git a/lldb/test/API/commands/register/register/aarch64_dynamic_regset/TestArm64DynamicRegsets.py b/lldb/test/API/commands/register/register/aarch64_dynamic_regset/TestArm64DynamicRegsets.py index eb121ecbfdba..a985ebbced71 100644 --- a/lldb/test/API/commands/register/register/aarch64_dynamic_regset/TestArm64DynamicRegsets.py +++ b/lldb/test/API/commands/register/register/aarch64_dynamic_regset/TestArm64DynamicRegsets.py @@ -97,6 +97,9 @@ class RegisterCommandsTestCase(TestBase): @skipIf(oslist=no_match(["linux"])) def test_aarch64_dynamic_regset_config(self): """Test AArch64 Dynamic Register sets configuration.""" + if not self.isAArch64SVE(): + self.skipTest("SVE must be present") + register_sets = self.setup_register_config_test() for registerSet in register_sets: @@ -259,6 +262,8 @@ class RegisterCommandsTestCase(TestBase): def test_aarch64_dynamic_regset_config_sme_write_za_to_enable(self): """Test that ZA and ZT0 (if present) shows as 0s when disabled and can be enabled by writing to ZA.""" + if not self.isAArch64SVE(): + self.skipTest("SVE must be present.") if not self.isAArch64SME(): self.skipTest("SME must be present.") @@ -270,6 +275,8 @@ class RegisterCommandsTestCase(TestBase): def test_aarch64_dynamic_regset_config_sme_write_zt0_to_enable(self): """Test that ZA and ZT0 (if present) shows as 0s when disabled and can be enabled by writing to ZT0.""" + if not self.isAArch64SVE(): + self.skipTest("SVE must be present.") if not self.isAArch64SME(): self.skipTest("SME must be present.") if not self.isAArch64SME2(): diff --git a/lldb/test/API/functionalities/breakpoint/same_cu_name/Makefile b/lldb/test/API/functionalities/breakpoint/same_cu_name/Makefile index b19e7818601e..b508da24c682 100644 --- a/lldb/test/API/functionalities/breakpoint/same_cu_name/Makefile +++ b/lldb/test/API/functionalities/breakpoint/same_cu_name/Makefile @@ -4,16 +4,16 @@ LD_EXTRAS := ns1.o ns2.o ns3.o ns4.o a.out: main.o ns1.o ns2.o ns3.o ns4.o ns1.o: common.cpp - $(CC) -gdwarf -c -DNAMESPACE=ns1 -o $@ $< + $(CXX) -gdwarf -c -DNAMESPACE=ns1 -o $@ $< ns2.o: common.cpp - $(CC) -gdwarf -c -DNAMESPACE=ns2 -o $@ $< + $(CXX) -gdwarf -c -DNAMESPACE=ns2 -o $@ $< ns3.o: common.cpp - $(CC) -gdwarf -c -DNAMESPACE=ns3 -o $@ $< + $(CXX) -gdwarf -c -DNAMESPACE=ns3 -o $@ $< ns4.o: common.cpp - $(CC) -gdwarf -c -DNAMESPACE=ns4 -o $@ $< + $(CXX) -gdwarf -c -DNAMESPACE=ns4 -o $@ $< include Makefile.rules diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx-simulators/optional/TestDataFormatterLibcxxOptionalSimulator.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx-simulators/optional/TestDataFormatterLibcxxOptionalSimulator.py index 3fefe87dcad9..7463f8897901 100644 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx-simulators/optional/TestDataFormatterLibcxxOptionalSimulator.py +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx-simulators/optional/TestDataFormatterLibcxxOptionalSimulator.py @@ -53,6 +53,8 @@ for r in range(2): # causing this test to fail. This was reverted in newer version of clang # with commit 52a9ba7ca. @skipIf(compiler="clang", compiler_version=["=", "17"]) + @skipIf(compiler="clang", compiler_version=["=", "18"]) + @skipIf(compiler="clang", compiler_version=["=", "19"]) @functools.wraps(LibcxxOptionalDataFormatterSimulatorTestCase._run_test) def test_method(self, defines=defines): LibcxxOptionalDataFormatterSimulatorTestCase._run_test(self, defines) diff --git a/lldb/test/API/lang/cpp/libcxx-internals-recognizer/TestLibcxxInternalsRecognizer.py b/lldb/test/API/lang/cpp/libcxx-internals-recognizer/TestLibcxxInternalsRecognizer.py index d8a729b322fe..2f942da604ff 100644 --- a/lldb/test/API/lang/cpp/libcxx-internals-recognizer/TestLibcxxInternalsRecognizer.py +++ b/lldb/test/API/lang/cpp/libcxx-internals-recognizer/TestLibcxxInternalsRecognizer.py @@ -9,7 +9,7 @@ class LibCxxInternalsRecognizerTestCase(TestBase): NO_DEBUG_INFO_TESTCASE = True @add_test_categories(["libc++"]) - @skipIf(compiler="clang", compiler_version=["<", "19.0"]) + @skipIf(compiler="clang", compiler_version=["<=", "19.0"]) def test_frame_recognizer(self): """Test that implementation details of libc++ are hidden""" self.build() diff --git a/lldb/test/API/lua_api/TestThreadAPI.lua b/lldb/test/API/lua_api/TestThreadAPI.lua new file mode 100644 index 000000000000..5a38d0ba9192 --- /dev/null +++ b/lldb/test/API/lua_api/TestThreadAPI.lua @@ -0,0 +1,25 @@ +_T = require('lua_lldb_test').create_test('TestThreadAPI') + +function _T:TestGetStopDescription() + local target = self:create_target() + local breakpoint = target:BreakpointCreateByName("main", "a.out") + assertTrue(breakpoint:IsValid() and breakpoint:GetNumLocations() == 1) + + local process = target:LaunchSimple({ 'arg1', 'arg2' }, nil, nil) + local thread = get_stopped_thread(process, lldb.eStopReasonBreakpoint) + assertNotNil(thread) + assertTrue(thread:IsValid()) + + assertEqual("breakpoint", thread:GetStopDescription(string.len("breakpoint") + 1)) + assertEqual("break", thread:GetStopDescription(string.len("break") + 1)) + assertEqual("b", thread:GetStopDescription(string.len("b") + 1)) + assertEqual("breakpoint 1.1", thread:GetStopDescription(string.len("breakpoint 1.1") + 100)) + + -- Test stream variation + local stream = lldb.SBStream() + assertTrue(thread:GetStopDescription(stream)) + assertNotNil(stream) + assertEqual("breakpoint 1.1", stream:GetData()) +end + +os.exit(_T:run()) diff --git a/lldb/test/API/python_api/default-constructor/sb_thread.py b/lldb/test/API/python_api/default-constructor/sb_thread.py index 34eb3db852c3..4252fa0321ff 100644 --- a/lldb/test/API/python_api/default-constructor/sb_thread.py +++ b/lldb/test/API/python_api/default-constructor/sb_thread.py @@ -10,6 +10,7 @@ def fuzz_obj(obj): obj.GetStopReasonDataCount() obj.GetStopReasonDataAtIndex(100) obj.GetStopDescription(256) + obj.GetStopDescription(lldb.SBStream()) obj.GetThreadID() obj.GetIndexID() obj.GetName() diff --git a/lldb/test/API/python_api/thread/TestThreadAPI.py b/lldb/test/API/python_api/thread/TestThreadAPI.py index 5583434a742a..acad7583eec1 100644 --- a/lldb/test/API/python_api/thread/TestThreadAPI.py +++ b/lldb/test/API/python_api/thread/TestThreadAPI.py @@ -138,6 +138,11 @@ class ThreadAPITestCase(TestBase): "breakpoint 1.1", thread.GetStopDescription(len("breakpoint 1.1") + 100) ) + # Test the stream variation + stream = lldb.SBStream() + self.assertTrue(thread.GetStopDescription(stream)) + self.assertEqual("breakpoint 1.1", stream.GetData()) + def step_out_of_malloc_into_function_b(self, exe_name): """Test Python SBThread.StepOut() API to step out of a malloc call where the call site is at function b().""" exe = self.getBuildArtifact(exe_name) diff --git a/lldb/test/API/tools/lldb-dap/breakpoint-events/TestDAP_breakpointEvents.py b/lldb/test/API/tools/lldb-dap/breakpoint-events/TestDAP_breakpointEvents.py index beab4d6c1f5a..7b78541fb4f8 100644 --- a/lldb/test/API/tools/lldb-dap/breakpoint-events/TestDAP_breakpointEvents.py +++ b/lldb/test/API/tools/lldb-dap/breakpoint-events/TestDAP_breakpointEvents.py @@ -81,24 +81,20 @@ class TestDAP_breakpointEvents(lldbdap_testcase.DAPTestCaseBase): breakpoint["verified"], "expect foo breakpoint to not be verified" ) - # Flush the breakpoint events. - self.dap_server.wait_for_breakpoint_events() - # Continue to the breakpoint - self.continue_to_breakpoints(dap_breakpoint_ids) + self.continue_to_breakpoint(foo_bp_id) + self.continue_to_next_stop() # foo_bp2 + self.continue_to_breakpoint(main_bp_id) + self.continue_to_exit() - verified_breakpoint_ids = [] - unverified_breakpoint_ids = [] - for breakpoint_event in self.dap_server.wait_for_breakpoint_events(): - breakpoint = breakpoint_event["body"]["breakpoint"] - id = breakpoint["id"] - if breakpoint["verified"]: - verified_breakpoint_ids.append(id) - else: - unverified_breakpoint_ids.append(id) + bp_events = [e for e in self.dap_server.events if e["event"] == "breakpoint"] - self.assertIn(main_bp_id, unverified_breakpoint_ids) - self.assertIn(foo_bp_id, unverified_breakpoint_ids) + main_bp_events = [ + e for e in bp_events if e["body"]["breakpoint"]["id"] == main_bp_id + ] + foo_bp_events = [ + e for e in bp_events if e["body"]["breakpoint"]["id"] == foo_bp_id + ] - self.assertIn(main_bp_id, verified_breakpoint_ids) - self.assertIn(foo_bp_id, verified_breakpoint_ids) + self.assertTrue(main_bp_events) + self.assertTrue(foo_bp_events) diff --git a/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py b/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py index ca881f1d817c..09b13223e0a7 100644 --- a/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py +++ b/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py @@ -156,6 +156,7 @@ class TestDAP_launch(lldbdap_testcase.DAPTestCaseBase): self.build_and_launch( program, debuggerRoot=program_parent_dir, initCommands=commands ) + self.continue_to_exit() output = self.get_console() self.assertTrue(output and len(output) > 0, "expect console output") lines = output.splitlines() @@ -171,7 +172,6 @@ class TestDAP_launch(lldbdap_testcase.DAPTestCaseBase): % (program_parent_dir, line[len(prefix) :]), ) self.assertTrue(found, "verified lldb-dap working directory") - self.continue_to_exit() def test_sourcePath(self): """ diff --git a/lldb/test/API/tools/lldb-dap/module-event/TestDAP_module_event.py b/lldb/test/API/tools/lldb-dap/module-event/TestDAP_module_event.py index 1f4afabbd161..9d1d17b704f7 100644 --- a/lldb/test/API/tools/lldb-dap/module-event/TestDAP_module_event.py +++ b/lldb/test/API/tools/lldb-dap/module-event/TestDAP_module_event.py @@ -1,58 +1,58 @@ -import dap_server +""" +Test 'module' events for dynamically loaded libraries. +""" + from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * -from lldbsuite.test import lldbutil import lldbdap_testcase -import re class TestDAP_module_event(lldbdap_testcase.DAPTestCaseBase): + def lookup_module_id(self, name): + """Returns the identifier for the first module event starting with the given name.""" + for event in self.dap_server.module_events: + if self.get_dict_value(event, ["body", "module", "name"]).startswith(name): + return self.get_dict_value(event, ["body", "module", "id"]) + self.fail(f"No module events matching name={name}") + + def module_events(self, id): + """Finds all module events by identifier.""" + return [ + event + for event in self.dap_server.module_events + if self.get_dict_value(event, ["body", "module", "id"]) == id + ] + + def module_reasons(self, events): + """Returns the list of 'reason' values from the given events.""" + return [event["body"]["reason"] for event in events] + @skipIfWindows def test_module_event(self): + """ + Test that module events are fired on target load and when the list of + dynamic libraries updates while running. + """ program = self.getBuildArtifact("a.out") self.build_and_launch(program) + # We can analyze the order of events after the process exits. + self.continue_to_exit() - source = "main.cpp" - breakpoint1_line = line_number(source, "// breakpoint 1") - breakpoint2_line = line_number(source, "// breakpoint 2") - breakpoint3_line = line_number(source, "// breakpoint 3") + a_out_id = self.lookup_module_id("a.out") + a_out_events = self.module_events(id=a_out_id) - breakpoint_ids = self.set_source_breakpoints( - source, [breakpoint1_line, breakpoint2_line, breakpoint3_line] + self.assertIn( + "new", + self.module_reasons(a_out_events), + "Expected a.out to load during the debug session.", ) - self.continue_to_breakpoints(breakpoint_ids) - - # We're now stopped at breakpoint 1 before the dlopen. Flush all the module events. - event = self.dap_server.wait_for_event(["module"]) - while event is not None: - event = self.dap_server.wait_for_event(["module"]) - - # Continue to the second breakpoint, before the dlclose. - self.continue_to_breakpoints(breakpoint_ids) - - # Make sure we got a module event for libother. - event = self.dap_server.wait_for_event(["module"]) - self.assertIsNotNone(event, "didn't get a module event") - module_name = event["body"]["module"]["name"] - module_id = event["body"]["module"]["id"] - self.assertEqual(event["body"]["reason"], "new") - self.assertIn("libother", module_name) - - # Continue to the third breakpoint, after the dlclose. - self.continue_to_breakpoints(breakpoint_ids) - - # Make sure we got a module event for libother. - event = self.dap_server.wait_for_event(["module"]) - self.assertIsNotNone(event, "didn't get a module event") - reason = event["body"]["reason"] - self.assertEqual(reason, "removed") - self.assertEqual(event["body"]["module"]["id"], module_id) - - # The removed module event should omit everything but the module id and name - # as they are required fields. - module_data = event["body"]["module"] - required_keys = ["id", "name"] - self.assertListEqual(list(module_data.keys()), required_keys) - self.assertEqual(module_data["name"], "", "expects empty name.") - self.continue_to_exit() + libother_id = self.lookup_module_id( + "libother." # libother.so or libother.dylib based on OS. + ) + libother_events = self.module_events(id=libother_id) + self.assertEqual( + self.module_reasons(libother_events), + ["new", "removed"], + "Expected libother to be loaded then unloaded during the debug session.", + ) diff --git a/lldb/test/API/tools/lldb-dap/module/TestDAP_module.py b/lldb/test/API/tools/lldb-dap/module/TestDAP_module.py index 0ed53dac5d86..2d00c512721c 100644 --- a/lldb/test/API/tools/lldb-dap/module/TestDAP_module.py +++ b/lldb/test/API/tools/lldb-dap/module/TestDAP_module.py @@ -64,19 +64,18 @@ class TestDAP_module(lldbdap_testcase.DAPTestCaseBase): self.assertEqual(program, program_module["path"]) self.assertIn("addressRange", program_module) + self.continue_to_exit() + # Collect all the module names we saw as events. module_new_names = [] module_changed_names = [] - module_event = self.dap_server.wait_for_event(["module"]) - while module_event is not None: + for module_event in self.dap_server.module_events: reason = module_event["body"]["reason"] if reason == "new": module_new_names.append(module_event["body"]["module"]["name"]) elif reason == "changed": module_changed_names.append(module_event["body"]["module"]["name"]) - module_event = self.dap_server.wait_for_event(["module"]) - # Make sure we got an event for every active module. self.assertNotEqual(len(module_new_names), 0) for module in active_modules: @@ -86,7 +85,6 @@ class TestDAP_module(lldbdap_testcase.DAPTestCaseBase): # symbols got added. self.assertNotEqual(len(module_changed_names), 0) self.assertIn(program_module["name"], module_changed_names) - self.continue_to_exit() @skipIfWindows def test_modules(self): diff --git a/lldb/test/API/tools/lldb-dap/restart/TestDAP_restart_console.py b/lldb/test/API/tools/lldb-dap/restart/TestDAP_restart_console.py index e1ad1425a993..fa62ec243f5c 100644 --- a/lldb/test/API/tools/lldb-dap/restart/TestDAP_restart_console.py +++ b/lldb/test/API/tools/lldb-dap/restart/TestDAP_restart_console.py @@ -30,7 +30,11 @@ class TestDAP_restart_console(lldbdap_testcase.DAPTestCaseBase): if reason == "entry": seen_stopped_event += 1 - self.assertEqual(seen_stopped_event, 1, "expect only one stopped entry event.") + self.assertEqual( + seen_stopped_event, + 1, + f"expect only one stopped entry event in {stopped_events}", + ) @skipIfAsan @skipIfWindows @@ -92,11 +96,13 @@ class TestDAP_restart_console(lldbdap_testcase.DAPTestCaseBase): self.build_and_launch(program, console="integratedTerminal", stopOnEntry=True) [bp_main] = self.set_function_breakpoints(["main"]) - self.dap_server.request_continue() # sends configuration done - stopped_events = self.dap_server.wait_for_stopped() + self.dap_server.request_configurationDone() + stopped_threads = list(self.dap_server.thread_stop_reasons.values()) # We should be stopped at the entry point. - self.assertGreaterEqual(len(stopped_events), 0, "expect stopped events") - self.verify_stopped_on_entry(stopped_events) + self.assertEqual( + len(stopped_threads), 1, "Expected the main thread to be stopped on entry." + ) + self.assertEqual(stopped_threads[0]["reason"], "entry") # Then, if we continue, we should hit the breakpoint at main. self.dap_server.request_continue() @@ -105,8 +111,12 @@ class TestDAP_restart_console(lldbdap_testcase.DAPTestCaseBase): # Restart and check that we still get a stopped event before reaching # main. self.dap_server.request_restart() - stopped_events = self.dap_server.wait_for_stopped() - self.verify_stopped_on_entry(stopped_events) + stopped_threads = list(self.dap_server.thread_stop_reasons.values()) + # We should be stopped at the entry point. + self.assertEqual( + len(stopped_threads), 1, "Expected the main thread to be stopped on entry." + ) + self.assertEqual(stopped_threads[0]["reason"], "entry") # continue to main self.dap_server.request_continue() diff --git a/lldb/test/API/tools/lldb-dap/send-event/TestDAP_sendEvent.py b/lldb/test/API/tools/lldb-dap/send-event/TestDAP_sendEvent.py index a01845669666..018402058917 100644 --- a/lldb/test/API/tools/lldb-dap/send-event/TestDAP_sendEvent.py +++ b/lldb/test/API/tools/lldb-dap/send-event/TestDAP_sendEvent.py @@ -32,7 +32,7 @@ class TestDAP_sendEvent(lldbdap_testcase.DAPTestCaseBase): ], ) self.set_source_breakpoints(source, [breakpoint_line]) - self.continue_to_next_stop() + self.do_continue() custom_event = self.dap_server.wait_for_event( filter=["my-custom-event-no-body"] diff --git a/lldb/test/Shell/Breakpoint/jit-loader_jitlink_elf.test b/lldb/test/Shell/Breakpoint/jit-loader_jitlink_elf.test index 52c86fa5530b..9a972f1f1ece 100644 --- a/lldb/test/Shell/Breakpoint/jit-loader_jitlink_elf.test +++ b/lldb/test/Shell/Breakpoint/jit-loader_jitlink_elf.test @@ -3,8 +3,8 @@ # JITLink is the Orc-specific JIT linker implementation. # -# RUN: %clang -g -S -emit-llvm -fPIC --target=x86_64-unknown-unknown-elf \ -# RUN: -o %t.ll %p/Inputs/jitbp.cpp +# RUN: %clangxx -g -S -emit-llvm -fPIC --target=x86_64-unknown-unknown-elf \ +# RUN: -o %t.ll %p/Inputs/jitbp.cpp # RUN: %lldb -b -o 'settings set plugin.jit-loader.gdb.enable on' -o 'b jitbp' \ # RUN: -o 'run --jit-linker=jitlink %t.ll' lli | FileCheck %s diff --git a/lldb/test/Shell/Breakpoint/jit-loader_rtdyld_elf.test b/lldb/test/Shell/Breakpoint/jit-loader_rtdyld_elf.test index b34a5673936f..ae9402a51949 100644 --- a/lldb/test/Shell/Breakpoint/jit-loader_rtdyld_elf.test +++ b/lldb/test/Shell/Breakpoint/jit-loader_rtdyld_elf.test @@ -3,8 +3,8 @@ # RuntimeDyld can be used to link and load emitted code for both, MCJIT and Orc. # -# RUN: %clang -g -S -emit-llvm --target=x86_64-unknown-unknown-elf \ -# RUN: -o %t.ll %p/Inputs/jitbp.cpp +# RUN: %clangxx -g -S -emit-llvm --target=x86_64-unknown-unknown-elf \ +# RUN: -o %t.ll %p/Inputs/jitbp.cpp # # RUN: %lldb -b -o 'settings set plugin.jit-loader.gdb.enable on' -o 'b jitbp' \ # RUN: -o 'run --jit-kind=mcjit %t.ll' lli | FileCheck %s diff --git a/lldb/test/Shell/Commands/command-image-dump-ast-colored.test b/lldb/test/Shell/Commands/command-image-dump-ast-colored.test index 355ef6bb1d19..7fd70d234fbd 100644 --- a/lldb/test/Shell/Commands/command-image-dump-ast-colored.test +++ b/lldb/test/Shell/Commands/command-image-dump-ast-colored.test @@ -1,7 +1,7 @@ # Test AST dumping with and without color. # RUN: split-file %s %t -# RUN: %clang_host -g -gdwarf %t/main.cpp -o %t.out +# RUN: %clangxx_host -g -gdwarf %t/main.cpp -o %t.out # RUN: %lldb -x -b -s %t/commands.input %t.out -o exit 2>&1 \ # RUN: | FileCheck %s diff --git a/lldb/test/Shell/Commands/command-image-dump-ast.test b/lldb/test/Shell/Commands/command-image-dump-ast.test index 3204022418cb..86fe1836a2c6 100644 --- a/lldb/test/Shell/Commands/command-image-dump-ast.test +++ b/lldb/test/Shell/Commands/command-image-dump-ast.test @@ -5,7 +5,7 @@ # UNSUPPORTED: system-windows # RUN: split-file %s %t -# RUN: %clang_host -g -gdwarf %t/main.cpp -o %t.out +# RUN: %clangxx_host -g -gdwarf %t/main.cpp -o %t.out # RUN: %lldb -x -b -s %t/commands.input %t.out -o exit 2>&1 \ # RUN: | FileCheck %s diff --git a/lldb/test/Shell/Commands/list-header.test b/lldb/test/Shell/Commands/list-header.test index 53c4b786f181..27eaa1a4f29c 100644 --- a/lldb/test/Shell/Commands/list-header.test +++ b/lldb/test/Shell/Commands/list-header.test @@ -3,11 +3,11 @@ # XFAIL: target-windows ## Test that `list header.h:<line>` works correctly when header is available. -## +## # RUN: split-file %s %t -# RUN: %clang_host -g %t/main_with_inlined.cc %t/foo.cc -o %t/main_with_inlined.out -# RUN: %clang_host -g %t/main_no_inlined.cc %t/foo.cc -o %t/main_no_inlined.out +# RUN: %clangxx_host -g %t/main_with_inlined.cc %t/foo.cc -o %t/main_with_inlined.out +# RUN: %clangxx_host -g %t/main_no_inlined.cc %t/foo.cc -o %t/main_no_inlined.out # RUN: %lldb %t/main_with_inlined.out -o "list foo.h:2" -o "exit" 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-INLINED @@ -19,7 +19,7 @@ # CHECK-INLINED: 2 extern int* ptr; # CHECK-INLINED: 3 void f(int x); -# CHECK-INLINED: 4 +# CHECK-INLINED: 4 # CHECK-INLINED: 5 inline void g(int x) { # CHECK-INLINED: 6 *ptr = x; // should crash here # CHECK-INLINED: 7 } diff --git a/lldb/test/Shell/Error/cleanup.cpp b/lldb/test/Shell/Error/cleanup.cpp index 6abc62dc4af9..1e83478a8333 100644 --- a/lldb/test/Shell/Error/cleanup.cpp +++ b/lldb/test/Shell/Error/cleanup.cpp @@ -1,5 +1,5 @@ // Test CommandObject is cleaned up even after commands fail due to not taking any argument. -// RUN: %clang_host -g %s -o %t +// RUN: %clangxx_host -g %s -o %t // RUN: %lldb -f %t -o "settings set interpreter.stop-command-source-on-error false" -s \ // RUN: %S/Inputs/cleanup.lldbinit int main() { return 0; } diff --git a/lldb/test/Shell/Expr/TestExprLanguageNote.test b/lldb/test/Shell/Expr/TestExprLanguageNote.test index e8e4e1399e45..e7da30816319 100644 --- a/lldb/test/Shell/Expr/TestExprLanguageNote.test +++ b/lldb/test/Shell/Expr/TestExprLanguageNote.test @@ -1,5 +1,5 @@ # RUN: split-file %s %t -# RUN: %clang_host -g %t/main.cpp -o %t.out +# RUN: %clangxx_host -g %t/main.cpp -o %t.out # # RUN: %lldb -x -b -o "settings set interpreter.stop-command-source-on-error false" \ # RUN: -s %t/no-target.input 2>&1 | FileCheck %s --check-prefix=CHECK-NO-TARGET diff --git a/lldb/test/Shell/Expr/TestLambdaExprImport.test b/lldb/test/Shell/Expr/TestLambdaExprImport.test index c57ce06453fe..b49a38036e56 100644 --- a/lldb/test/Shell/Expr/TestLambdaExprImport.test +++ b/lldb/test/Shell/Expr/TestLambdaExprImport.test @@ -3,7 +3,7 @@ # uses always). # RUN: split-file %s %t -# RUN: %clang_host -g -gdwarf %t/main.cpp -o %t.out +# RUN: %clangxx_host -g -gdwarf %t/main.cpp -o %t.out # RUN: %lldb -o "settings set interpreter.stop-command-source-on-error false" \ # RUN: -x -b -s %t/commands.input %t.out 2>&1 \ # RUN: | FileCheck %s diff --git a/lldb/test/Shell/ObjectFile/ELF/elf-memory.test b/lldb/test/Shell/ObjectFile/ELF/elf-memory.test index 75a68edd2d34..170dc7682aab 100644 --- a/lldb/test/Shell/ObjectFile/ELF/elf-memory.test +++ b/lldb/test/Shell/ObjectFile/ELF/elf-memory.test @@ -11,7 +11,7 @@ // - verify that "image dump objfile" will dump the dynamic section of the // memory elf file and find the .dynamic string table. -// RUN: %clang_host %p/Inputs/memory-elf.cpp -g -O0 -o %t +// RUN: %clangxx_host %p/Inputs/memory-elf.cpp -g -O0 -o %t // RUN: %lldb %t -b \ // RUN: -o "b main" \ diff --git a/lldb/test/Shell/ObjectFile/MachO/Inputs/section-overflow-binary b/lldb/test/Shell/ObjectFile/MachO/Inputs/section-overflow-binary Binary files differnew file mode 100644 index 000000000000..19dc2f4ac9ff --- /dev/null +++ b/lldb/test/Shell/ObjectFile/MachO/Inputs/section-overflow-binary diff --git a/lldb/test/Shell/ObjectFile/MachO/section-overflow-binary.test b/lldb/test/Shell/ObjectFile/MachO/section-overflow-binary.test new file mode 100644 index 000000000000..76c335f65a76 --- /dev/null +++ b/lldb/test/Shell/ObjectFile/MachO/section-overflow-binary.test @@ -0,0 +1,13 @@ +RUN: %lldb -b %p/Inputs/section-overflow-binary \ +RUN: -o 'script dwarf = lldb.target.module[0].sections[0]' \ +RUN: -o 'script section = dwarf.GetSubSectionAtIndex(0)' \ +RUN: -o "script print(f'{section.GetName()} file_offset=0x{section.GetFileOffset():016x}')" \ +RUN: -o 'script section = dwarf.GetSubSectionAtIndex(1)' \ +RUN: -o "script print(f'{section.GetName()} file_offset=0x{section.GetFileOffset():016x}')" \ +RUN: -o 'script section = dwarf.GetSubSectionAtIndex(2)' \ +RUN: -o "script print(f'{section.GetName()} file_offset=0x{section.GetFileOffset():016x}')" \ +RUN: | FileCheck %s + +CHECK: __debug_abbrev file_offset=0x00000000fffffff0 +CHECK: __debug_info file_offset=0x0000000100000010 +CHECK: __debug_line file_offset=0x0000000300000010 diff --git a/lldb/test/Shell/Recognizer/verbose_trap-in-stl-callback-user-leaf.test b/lldb/test/Shell/Recognizer/verbose_trap-in-stl-callback-user-leaf.test index 5a84c163453c..32b4095d9add 100644 --- a/lldb/test/Shell/Recognizer/verbose_trap-in-stl-callback-user-leaf.test +++ b/lldb/test/Shell/Recognizer/verbose_trap-in-stl-callback-user-leaf.test @@ -12,7 +12,7 @@ # UNSUPPORTED: system-windows # -# RUN: %clang_host -g -O0 %S/Inputs/verbose_trap-in-stl-callback-user-leaf.cpp -o %t.out +# RUN: %clangxx_host -g -O0 %S/Inputs/verbose_trap-in-stl-callback-user-leaf.cpp -o %t.out # RUN: %lldb -b -s %s %t.out | FileCheck %s --check-prefixes=CHECK run diff --git a/lldb/test/Shell/Recognizer/verbose_trap-in-stl-callback.test b/lldb/test/Shell/Recognizer/verbose_trap-in-stl-callback.test index b15bcb3a384f..c8c433c0a819 100644 --- a/lldb/test/Shell/Recognizer/verbose_trap-in-stl-callback.test +++ b/lldb/test/Shell/Recognizer/verbose_trap-in-stl-callback.test @@ -11,7 +11,7 @@ # UNSUPPORTED: system-windows # -# RUN: %clang_host -g -O0 %S/Inputs/verbose_trap-in-stl-callback.cpp -o %t.out +# RUN: %clangxx_host -g -O0 %S/Inputs/verbose_trap-in-stl-callback.cpp -o %t.out # RUN: %lldb -b -s %s %t.out | FileCheck %s --check-prefixes=CHECK run diff --git a/lldb/test/Shell/Recognizer/verbose_trap-in-stl-max-depth.test b/lldb/test/Shell/Recognizer/verbose_trap-in-stl-max-depth.test index 2ea6594643c9..d0789ac7dc67 100644 --- a/lldb/test/Shell/Recognizer/verbose_trap-in-stl-max-depth.test +++ b/lldb/test/Shell/Recognizer/verbose_trap-in-stl-max-depth.test @@ -4,7 +4,7 @@ # UNSUPPORTED: system-windows # -# RUN: %clang_host -g -O0 %S/Inputs/verbose_trap-in-stl-max-depth.cpp -o %t.out +# RUN: %clangxx_host -g -O0 %S/Inputs/verbose_trap-in-stl-max-depth.cpp -o %t.out # RUN: %lldb -b -s %s %t.out | FileCheck %s --check-prefixes=CHECK run diff --git a/lldb/test/Shell/Recognizer/verbose_trap-in-stl-nested.test b/lldb/test/Shell/Recognizer/verbose_trap-in-stl-nested.test index 81a492d1ed57..68a4ea612c0d 100644 --- a/lldb/test/Shell/Recognizer/verbose_trap-in-stl-nested.test +++ b/lldb/test/Shell/Recognizer/verbose_trap-in-stl-nested.test @@ -3,7 +3,7 @@ # UNSUPPORTED: system-windows # -# RUN: %clang_host -g -O0 %S/Inputs/verbose_trap-in-stl-nested.cpp -o %t.out +# RUN: %clangxx_host -g -O0 %S/Inputs/verbose_trap-in-stl-nested.cpp -o %t.out # RUN: %lldb -b -s %s %t.out | FileCheck %s --check-prefixes=CHECK run diff --git a/lldb/test/Shell/Recognizer/verbose_trap-in-stl.test b/lldb/test/Shell/Recognizer/verbose_trap-in-stl.test index dd08290174e3..bd4851146b40 100644 --- a/lldb/test/Shell/Recognizer/verbose_trap-in-stl.test +++ b/lldb/test/Shell/Recognizer/verbose_trap-in-stl.test @@ -3,7 +3,7 @@ # UNSUPPORTED: system-windows # -# RUN: %clang_host -g -O0 %S/Inputs/verbose_trap-in-stl.cpp -o %t.out +# RUN: %clangxx_host -g -O0 %S/Inputs/verbose_trap-in-stl.cpp -o %t.out # RUN: %lldb -b -s %s %t.out | FileCheck %s --check-prefixes=CHECK run diff --git a/lldb/test/Shell/Recognizer/verbose_trap.test b/lldb/test/Shell/Recognizer/verbose_trap.test index dafab7bdea68..ab0df082cc03 100644 --- a/lldb/test/Shell/Recognizer/verbose_trap.test +++ b/lldb/test/Shell/Recognizer/verbose_trap.test @@ -1,15 +1,15 @@ # UNSUPPORTED: system-windows # -# RUN: %clang_host -g -O0 %S/Inputs/verbose_trap.cpp -o %t.out -DVERBOSE_TRAP_TEST_CATEGORY=\"Foo\" -DVERBOSE_TRAP_TEST_MESSAGE=\"Bar\" +# RUN: %clangxx_host -g -O0 %S/Inputs/verbose_trap.cpp -o %t.out -DVERBOSE_TRAP_TEST_CATEGORY=\"Foo\" -DVERBOSE_TRAP_TEST_MESSAGE=\"Bar\" # RUN: %lldb -b -s %s %t.out | FileCheck %s --check-prefixes=CHECK,CHECK-BOTH # -# RUN: %clang_host -g -O0 %S/Inputs/verbose_trap.cpp -o %t.out -DVERBOSE_TRAP_TEST_CATEGORY=\"\" -DVERBOSE_TRAP_TEST_MESSAGE=\"Bar\" +# RUN: %clangxx_host -g -O0 %S/Inputs/verbose_trap.cpp -o %t.out -DVERBOSE_TRAP_TEST_CATEGORY=\"\" -DVERBOSE_TRAP_TEST_MESSAGE=\"Bar\" # RUN: %lldb -b -s %s %t.out | FileCheck %s --check-prefixes=CHECK,CHECK-MESSAGE_ONLY # -# RUN: %clang_host -g -O0 %S/Inputs/verbose_trap.cpp -o %t.out -DVERBOSE_TRAP_TEST_CATEGORY=\"Foo\" -DVERBOSE_TRAP_TEST_MESSAGE=\"\" +# RUN: %clangxx_host -g -O0 %S/Inputs/verbose_trap.cpp -o %t.out -DVERBOSE_TRAP_TEST_CATEGORY=\"Foo\" -DVERBOSE_TRAP_TEST_MESSAGE=\"\" # RUN: %lldb -b -s %s %t.out | FileCheck %s --check-prefixes=CHECK,CHECK-CATEGORY_ONLY # -# RUN: %clang_host -g -O0 %S/Inputs/verbose_trap.cpp -o %t.out -DVERBOSE_TRAP_TEST_CATEGORY=\"\" -DVERBOSE_TRAP_TEST_MESSAGE=\"\" +# RUN: %clangxx_host -g -O0 %S/Inputs/verbose_trap.cpp -o %t.out -DVERBOSE_TRAP_TEST_CATEGORY=\"\" -DVERBOSE_TRAP_TEST_MESSAGE=\"\" # RUN: %lldb -b -s %s %t.out | FileCheck %s --check-prefixes=CHECK,CHECK-NONE run diff --git a/lldb/test/Shell/Register/Inputs/x86-multithread-read.cpp b/lldb/test/Shell/Register/Inputs/x86-multithread-read.cpp index c5f571fc1d2c..0d2869c0c577 100644 --- a/lldb/test/Shell/Register/Inputs/x86-multithread-read.cpp +++ b/lldb/test/Shell/Register/Inputs/x86-multithread-read.cpp @@ -1,4 +1,5 @@ #include <cstdint> +#include <functional> #include <mutex> #include <thread> diff --git a/lldb/test/Shell/Register/Inputs/x86-multithread-write.cpp b/lldb/test/Shell/Register/Inputs/x86-multithread-write.cpp index 320f9e938e5b..1f4e91acc4c0 100644 --- a/lldb/test/Shell/Register/Inputs/x86-multithread-write.cpp +++ b/lldb/test/Shell/Register/Inputs/x86-multithread-write.cpp @@ -1,6 +1,7 @@ #include <cinttypes> #include <cstdint> #include <cstdio> +#include <functional> #include <mutex> #include <thread> diff --git a/lldb/test/Shell/Settings/TestChildCountTruncation.test b/lldb/test/Shell/Settings/TestChildCountTruncation.test index da6436cb5ca2..b66d0df98306 100644 --- a/lldb/test/Shell/Settings/TestChildCountTruncation.test +++ b/lldb/test/Shell/Settings/TestChildCountTruncation.test @@ -2,7 +2,7 @@ # when target.max-children-count wasn't explicitly set. # RUN: split-file %s %t -# RUN: %clang_host -g %t/main.cpp -o %t.out +# RUN: %clangxx_host -g %t/main.cpp -o %t.out # RUN: %lldb -x -b -s %t/dwim-commands.input %t.out -o exit 2>&1 \ # RUN: | FileCheck %s --check-prefix=DWIM # diff --git a/lldb/test/Shell/Settings/TestChildDepthTruncation.test b/lldb/test/Shell/Settings/TestChildDepthTruncation.test index 12f5661600ae..7e4fbbef9e45 100644 --- a/lldb/test/Shell/Settings/TestChildDepthTruncation.test +++ b/lldb/test/Shell/Settings/TestChildDepthTruncation.test @@ -2,7 +2,7 @@ # when target.max-children-depth wasn't explicitly set. # RUN: split-file %s %t -# RUN: %clang_host -g %t/main.cpp -o %t.out +# RUN: %clangxx_host -g %t/main.cpp -o %t.out # RUN: %lldb -x -b -s %t/dwim-commands.input %t.out -o exit 2>&1 \ # RUN: | FileCheck %s --check-prefix=DWIM # diff --git a/lldb/test/Shell/Settings/TestCxxFrameFormat.test b/lldb/test/Shell/Settings/TestCxxFrameFormat.test index d70db582e975..3ee92d53492f 100644 --- a/lldb/test/Shell/Settings/TestCxxFrameFormat.test +++ b/lldb/test/Shell/Settings/TestCxxFrameFormat.test @@ -3,7 +3,7 @@ # Test the plugin.cplusplus.display.function-name-format setting. # RUN: split-file %s %t -# RUN: %clang_host -g -gdwarf %t/main.cpp -o %t.out +# RUN: %clangxx_host -g -gdwarf %t/main.cpp -o %t.out # RUN: %lldb -x -b -s %t/commands.input %t.out -o exit 2>&1 \ # RUN: | FileCheck %s diff --git a/lldb/test/Shell/Settings/TestCxxFrameFormatEmpty.test b/lldb/test/Shell/Settings/TestCxxFrameFormatEmpty.test index 0a6d2723ded3..a0550b733d78 100644 --- a/lldb/test/Shell/Settings/TestCxxFrameFormatEmpty.test +++ b/lldb/test/Shell/Settings/TestCxxFrameFormatEmpty.test @@ -5,7 +5,7 @@ # ${function.name-with-args}. # RUN: split-file %s %t -# RUN: %clang_host -g -gdwarf %t/main.cpp -o %t.out +# RUN: %clangxx_host -g -gdwarf %t/main.cpp -o %t.out # RUN: %lldb -x -b -s %t/commands.input %t.out -o exit 2>&1 \ # RUN: | FileCheck %s diff --git a/lldb/test/Shell/Settings/TestCxxFrameFormatMixedLanguages.test b/lldb/test/Shell/Settings/TestCxxFrameFormatMixedLanguages.test index bafd36f5ae17..679d6e4d5abe 100644 --- a/lldb/test/Shell/Settings/TestCxxFrameFormatMixedLanguages.test +++ b/lldb/test/Shell/Settings/TestCxxFrameFormatMixedLanguages.test @@ -4,9 +4,9 @@ # when interoperating multiple languages. # RUN: split-file %s %t -# RUN: %clangxx_host -x c -c -g %t/lib.c -o %t.clib.o +# RUN: %clang_host -x c -c -g %t/lib.c -o %t.clib.o # RUN: %clangxx_host -c -g %t/lib.cpp -o %t.cxxlib.o -# RUN: %clangxx_host %t/main.m %t.cxxlib.o %t.clib.o -o %t.out +# RUN: %clang_host %t/main.m %t.cxxlib.o %t.clib.o -o %t.out # RUN: %lldb -x -b -s %t/commands.input %t.out -o exit 2>&1 | FileCheck %s #--- lib.c @@ -47,7 +47,7 @@ break set -n method run bt -# CHECK: custom-frame 'this affects C++ only' -# CHECK: custom-frame 'this affects C++ only' -# CHECK: custom-frame 'func' -# CHECK: custom-frame 'main' +# CHECK: custom-frame 'this affects C++ only' +# CHECK: custom-frame 'this affects C++ only' +# CHECK: custom-frame 'func' +# CHECK: custom-frame 'main' diff --git a/lldb/test/Shell/Settings/TestCxxFrameFormatPartialFailure.test b/lldb/test/Shell/Settings/TestCxxFrameFormatPartialFailure.test index e914ff7a010d..f279f07afcda 100644 --- a/lldb/test/Shell/Settings/TestCxxFrameFormatPartialFailure.test +++ b/lldb/test/Shell/Settings/TestCxxFrameFormatPartialFailure.test @@ -5,7 +5,7 @@ # were successful. # RUN: split-file %s %t -# RUN: %clang_host -g -gdwarf %t/main.cpp -o %t.out +# RUN: %clangxx_host -g -gdwarf %t/main.cpp -o %t.out # RUN: %lldb -x -b -s %t/commands.input %t.out -o exit 2>&1 \ # RUN: | FileCheck %s diff --git a/lldb/test/Shell/Settings/TestFrameFormatFunctionBasename.test b/lldb/test/Shell/Settings/TestFrameFormatFunctionBasename.test index c0008e50927b..56ec09e2f951 100644 --- a/lldb/test/Shell/Settings/TestFrameFormatFunctionBasename.test +++ b/lldb/test/Shell/Settings/TestFrameFormatFunctionBasename.test @@ -3,11 +3,11 @@ # Test the ${function.basename} frame-format variable. # RUN: split-file %s %t -# RUN: %clang_host -g -gdwarf %t/main.cpp -o %t.out +# RUN: %clangxx_host -g -gdwarf %t/main.cpp -o %t.out # RUN: %lldb -x -b -s %t/commands.input %t.out -o exit 2>&1 \ # RUN: | FileCheck %s # -# RUN: %clang_host -O0 %t/main.cpp -o %t-nodebug.out +# RUN: %clangxx_host -O0 %t/main.cpp -o %t-nodebug.out # RUN: %lldb -x -b -s %t/commands.input %t-nodebug.out -o exit 2>&1 \ # RUN: | FileCheck %s diff --git a/lldb/test/Shell/Settings/TestFrameFormatFunctionFormattedArguments.test b/lldb/test/Shell/Settings/TestFrameFormatFunctionFormattedArguments.test index 04f51701a2a2..f20fc8ca77ae 100644 --- a/lldb/test/Shell/Settings/TestFrameFormatFunctionFormattedArguments.test +++ b/lldb/test/Shell/Settings/TestFrameFormatFunctionFormattedArguments.test @@ -3,11 +3,11 @@ # Test the ${function.formatted-arguments} frame-format variable. # RUN: split-file %s %t -# RUN: %clang_host -g -gdwarf %t/main.cpp -o %t.out +# RUN: %clangxx_host -g -gdwarf %t/main.cpp -o %t.out # RUN: %lldb -x -b -s %t/commands.input %t.out -o exit 2>&1 \ # RUN: | FileCheck %s # -# RUN: %clang_host -O0 %t/main.cpp -o %t-nodebug.out +# RUN: %clangxx_host -O0 %t/main.cpp -o %t-nodebug.out # RUN: %lldb -x -b -s %t/commands.input %t-nodebug.out -o exit 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-NODEBUG diff --git a/lldb/test/Shell/Settings/TestFrameFormatFunctionQualifiers.test b/lldb/test/Shell/Settings/TestFrameFormatFunctionQualifiers.test index b1dfe834c1de..d05e60b0e8d1 100644 --- a/lldb/test/Shell/Settings/TestFrameFormatFunctionQualifiers.test +++ b/lldb/test/Shell/Settings/TestFrameFormatFunctionQualifiers.test @@ -3,11 +3,11 @@ # Test the ${function.qualifiers} frame-format variable. # RUN: split-file %s %t -# RUN: %clang_host -g -gdwarf %t/main.cpp -o %t.out +# RUN: %clangxx_host -g -gdwarf %t/main.cpp -o %t.out # RUN: %lldb -x -b -s %t/commands.input %t.out -o exit 2>&1 \ # RUN: | FileCheck %s # -# RUN: %clang_host -O0 %t/main.cpp -o %t-nodebug.out +# RUN: %clangxx_host -O0 %t/main.cpp -o %t-nodebug.out # RUN: %lldb -x -b -s %t/commands.input %t-nodebug.out -o exit 2>&1 \ # RUN: | FileCheck %s diff --git a/lldb/test/Shell/Settings/TestFrameFormatFunctionReturn.test b/lldb/test/Shell/Settings/TestFrameFormatFunctionReturn.test index f913162a1aa6..bb78258aba75 100644 --- a/lldb/test/Shell/Settings/TestFrameFormatFunctionReturn.test +++ b/lldb/test/Shell/Settings/TestFrameFormatFunctionReturn.test @@ -4,11 +4,11 @@ # frame-format variables. # RUN: split-file %s %t -# RUN: %clang_host -g -gdwarf %t/main.cpp -o %t.out +# RUN: %clangxx_host -g -gdwarf %t/main.cpp -o %t.out # RUN: %lldb -x -b -s %t/commands.input %t.out -o exit 2>&1 \ # RUN: | FileCheck %s # -# RUN: %clang_host -O0 %t/main.cpp -o %t-nodebug.out +# RUN: %clangxx_host -O0 %t/main.cpp -o %t-nodebug.out # RUN: %lldb -x -b -s %t/commands.input %t-nodebug.out -o exit 2>&1 \ # RUN: | FileCheck %s diff --git a/lldb/test/Shell/Settings/TestFrameFormatFunctionScope.test b/lldb/test/Shell/Settings/TestFrameFormatFunctionScope.test index a28c16f95a9e..f4a17661c360 100644 --- a/lldb/test/Shell/Settings/TestFrameFormatFunctionScope.test +++ b/lldb/test/Shell/Settings/TestFrameFormatFunctionScope.test @@ -3,11 +3,11 @@ # Test the ${function.scope} frame-format variable. # RUN: split-file %s %t -# RUN: %clang_host -g -gdwarf %t/main.cpp -o %t.out +# RUN: %clangxx_host -g -gdwarf %t/main.cpp -o %t.out # RUN: %lldb -x -b -s %t/commands.input %t.out -o exit 2>&1 \ # RUN: | FileCheck %s # -# RUN: %clang_host -O0 %t/main.cpp -o %t-nodebug.out +# RUN: %clangxx_host -O0 %t/main.cpp -o %t-nodebug.out # RUN: %lldb -x -b -s %t/commands.input %t-nodebug.out -o exit 2>&1 \ # RUN: | FileCheck %s diff --git a/lldb/test/Shell/Settings/TestFrameFormatFunctionSuffix.test b/lldb/test/Shell/Settings/TestFrameFormatFunctionSuffix.test index 4609a0412a0a..5883c722f333 100644 --- a/lldb/test/Shell/Settings/TestFrameFormatFunctionSuffix.test +++ b/lldb/test/Shell/Settings/TestFrameFormatFunctionSuffix.test @@ -3,7 +3,7 @@ # Test the ${function.suffix} frame-format variable. # RUN: split-file %s %t -# RUN: %clang_host -g -gdwarf %t/main.cpp -o %t.out +# RUN: %clangxx_host -g -gdwarf %t/main.cpp -o %t.out # RUN: %lldb -x -b -s %t/commands.input %t.out -o exit 2>&1 \ # RUN: | FileCheck %s diff --git a/lldb/test/Shell/Settings/TestFrameFormatFunctionTemplateArguments.test b/lldb/test/Shell/Settings/TestFrameFormatFunctionTemplateArguments.test index ac8a32820c88..a09a9610f48d 100644 --- a/lldb/test/Shell/Settings/TestFrameFormatFunctionTemplateArguments.test +++ b/lldb/test/Shell/Settings/TestFrameFormatFunctionTemplateArguments.test @@ -3,11 +3,11 @@ # Test the ${function.template-arguments} frame-format variable. # RUN: split-file %s %t -# RUN: %clang_host -g -gdwarf %t/main.cpp -o %t.out +# RUN: %clangxx_host -g -gdwarf %t/main.cpp -o %t.out # RUN: %lldb -x -b -s %t/commands.input %t.out -o exit 2>&1 \ # RUN: | FileCheck %s # -# RUN: %clang_host -O0 %t/main.cpp -o %t-nodebug.out +# RUN: %clangxx_host -O0 %t/main.cpp -o %t-nodebug.out # RUN: %lldb -x -b -s %t/commands.input %t-nodebug.out -o exit 2>&1 \ # RUN: | FileCheck %s diff --git a/lldb/test/Shell/Settings/TestFrameFunctionInlined.test b/lldb/test/Shell/Settings/TestFrameFunctionInlined.test index 5db34b416085..1bb7ab486bcf 100644 --- a/lldb/test/Shell/Settings/TestFrameFunctionInlined.test +++ b/lldb/test/Shell/Settings/TestFrameFunctionInlined.test @@ -6,7 +6,7 @@ # REQUIRES: (system-windows && lld) || !system-windows # RUN: split-file %s %t -# RUN: %clang_host -g -gdwarf %t/main.cpp -o %t.out %if system-windows %{-fuse-ld=lld%} +# RUN: %clangxx_host -g -gdwarf %t/main.cpp -o %t.out %if system-windows %{-fuse-ld=lld%} # RUN: %lldb -x -b -s %t/commands.input %t.out -o exit 2>&1 \ # RUN: | FileCheck %s diff --git a/lldb/test/Shell/SymbolFile/DWARF/split-dwarf-expression-eval-bug.cpp b/lldb/test/Shell/SymbolFile/DWARF/split-dwarf-expression-eval-bug.cpp index 4a8004ddd287..b02eea6bbc4f 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/split-dwarf-expression-eval-bug.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/split-dwarf-expression-eval-bug.cpp @@ -7,10 +7,10 @@ // UNSUPPORTED: system-darwin, system-windows -// RUN: %clang_host -c -gsplit-dwarf -g %s -o %t1.o -DONE -// RUN: %clang_host -c -gsplit-dwarf -g %s -o %t2.o -DTWO -// RUN: %clang_host -c -gsplit-dwarf -g %s -o %t3.o -DTHREE -// RUN: %clang_host %t1.o %t2.o %t3.o -o %t +// RUN: %clangxx_host -c -gsplit-dwarf -g %s -o %t1.o -DONE +// RUN: %clangxx_host -c -gsplit-dwarf -g %s -o %t2.o -DTWO +// RUN: %clangxx_host -c -gsplit-dwarf -g %s -o %t3.o -DTHREE +// RUN: %clangxx_host %t1.o %t2.o %t3.o -o %t // RUN: %lldb %t -o "br set -n foo" -o run -o "expression bool_in_first_cu" -o exit \ // RUN: | FileCheck %s diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/apple-index-is-used.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/apple-index-is-used.cpp index 5bcb2cbcbbe2..8ef2e56ba3d4 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/apple-index-is-used.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/apple-index-is-used.cpp @@ -1,5 +1,5 @@ // Test that we use the apple indexes. -// RUN: %clang %s -g -c -o %t --target=x86_64-apple-macosx -gdwarf-4 +// RUN: %clangxx %s -g -c -o %t --target=x86_64-apple-macosx -gdwarf-4 // RUN: lldb-test symbols %t | FileCheck %s // CHECK: .apple_names index present diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/debug-names-compressed.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/debug-names-compressed.cpp index 4dcbb4715220..53c3d3daa40c 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/debug-names-compressed.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/debug-names-compressed.cpp @@ -3,7 +3,7 @@ // REQUIRES: lld, zlib -// RUN: %clang -c -o %t.o --target=x86_64-pc-linux -gdwarf-5 -gpubnames %s +// RUN: %clangxx -c -o %t.o --target=x86_64-pc-linux -gdwarf-5 -gpubnames %s // RUN: ld.lld %t.o -o %t --compress-debug-sections=zlib // RUN: llvm-readobj --sections %t | FileCheck %s --check-prefix NAMES // RUN: lldb-test symbols --find=variable --name=foo %t | FileCheck %s diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/debug-types-debug-names.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/debug-types-debug-names.cpp index 2b7a928c89a8..acc34dd41688 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/debug-types-debug-names.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/debug-types-debug-names.cpp @@ -6,7 +6,7 @@ // REQUIRES: lld -// RUN: %clang %s -target x86_64-pc-linux -gdwarf-5 -fdebug-types-section \ +// RUN: %clangxx %s -target x86_64-pc-linux -gdwarf-5 -fdebug-types-section \ // RUN: -gpubnames -fno-limit-debug-info -c -o %t.o // RUN: ld.lld %t.o -o %t // RUN: %lldb %t -o "type lookup stype" -b | FileCheck %s --check-prefix=BASE diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/debug-types-dwo-cross-reference.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/debug-types-dwo-cross-reference.cpp index 0e29cb3e7f16..bc863fb64a9c 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/debug-types-dwo-cross-reference.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/debug-types-dwo-cross-reference.cpp @@ -3,9 +3,9 @@ // REQUIRES: lld -// RUN: %clang %s -target x86_64-pc-linux -fno-standalone-debug -g \ +// RUN: %clangxx %s -target x86_64-pc-linux -fno-standalone-debug -g \ // RUN: -fdebug-types-section -gsplit-dwarf -c -o %t1.o -DONE -// RUN: %clang %s -target x86_64-pc-linux -fno-standalone-debug -g \ +// RUN: %clangxx %s -target x86_64-pc-linux -fno-standalone-debug -g \ // RUN: -fdebug-types-section -gsplit-dwarf -c -o %t2.o -DTWO // RUN: llvm-dwarfdump %t1.dwo -debug-types -debug-info | FileCheck --check-prefix=ONEUNIT %s // RUN: llvm-dwarfdump %t2.dwo -debug-types -debug-info | FileCheck --check-prefix=ONEUNIT %s diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/dwarf5-index-is-used.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/dwarf5-index-is-used.cpp index d6ac23716f6c..2fdb1d8d7ca7 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/dwarf5-index-is-used.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/dwarf5-index-is-used.cpp @@ -2,7 +2,7 @@ // REQUIRES: lld -// RUN: %clang %s -c -o %t.o --target=x86_64-pc-linux -gdwarf-5 -gpubnames +// RUN: %clangxx %s -c -o %t.o --target=x86_64-pc-linux -gdwarf-5 -gpubnames // RUN: ld.lld %t.o -o %t // RUN: lldb-test symbols %t | FileCheck %s diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/dwarf5-partial-index.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/dwarf5-partial-index.cpp index ab84415f61b2..a739dfde48aa 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/dwarf5-partial-index.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/dwarf5-partial-index.cpp @@ -3,9 +3,9 @@ // REQUIRES: lld -// RUN: %clang %s -c -o %t-1.o --target=x86_64-pc-linux -DONE -gdwarf-5 -gpubnames +// RUN: %clangxx %s -c -o %t-1.o --target=x86_64-pc-linux -DONE -gdwarf-5 -gpubnames // RUN: llvm-readobj --sections %t-1.o | FileCheck %s --check-prefix NAMES -// RUN: %clang %s -c -o %t-2.o --target=x86_64-pc-linux -DTWO -gdwarf-5 -gno-pubnames +// RUN: %clangxx %s -c -o %t-2.o --target=x86_64-pc-linux -DTWO -gdwarf-5 -gno-pubnames // RUN: ld.lld %t-1.o %t-2.o -o %t // RUN: lldb-test symbols --find=variable --name=foo %t | FileCheck %s diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/dwo-not-found-warning.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/dwo-not-found-warning.cpp index 929e11f80e34..36eb299f0663 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/dwo-not-found-warning.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/dwo-not-found-warning.cpp @@ -1,4 +1,4 @@ -// RUN: %clang --target=x86_64-pc-linux -g -gsplit-dwarf -c %s -o %t.o +// RUN: %clangxx --target=x86_64-pc-linux -g -gsplit-dwarf -c %s -o %t.o // RUN: rm %t.dwo // RUN: %lldb %t.o -o "br set -n main" -o exit 2>&1 | FileCheck %s diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/dwp-foreign-type-units.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/dwp-foreign-type-units.cpp index 9251930d7d13..7fbc4f98e797 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/dwp-foreign-type-units.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/dwp-foreign-type-units.cpp @@ -16,9 +16,9 @@ // type unit comes from by looking at the DW_AT_dwo_name attribute in the // DW_TAG_type_unit. -// RUN: %clang -target x86_64-pc-linux -gdwarf-5 -gsplit-dwarf \ +// RUN: %clangxx -target x86_64-pc-linux -gdwarf-5 -gsplit-dwarf \ // RUN: -fdebug-types-section -gpubnames -c %s -o %t.main.o -// RUN: %clang -target x86_64-pc-linux -gdwarf-5 -gsplit-dwarf -DVARIANT \ +// RUN: %clangxx -target x86_64-pc-linux -gdwarf-5 -gsplit-dwarf -DVARIANT \ // RUN: -fdebug-types-section -gpubnames -c %s -o %t.foo.o // RUN: ld.lld %t.main.o %t.foo.o -o %t diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/dwp-index-cache.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/dwp-index-cache.cpp index 3e97c3fb1ebc..3edcd8f180a1 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/dwp-index-cache.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/dwp-index-cache.cpp @@ -14,8 +14,8 @@ // complete DWARF index. // Test that if we don't have .debug_names, that we save a full DWARF index. -// RUN: %clang -target x86_64-pc-linux -gsplit-dwarf -gdwarf-5 -DMAIN=1 -c %s -o %t.main.o -// RUN: %clang -target x86_64-pc-linux -gsplit-dwarf -gdwarf-5 -DMAIN=0 -c %s -o %t.foo.o +// RUN: %clangxx -target x86_64-pc-linux -gsplit-dwarf -gdwarf-5 -DMAIN=1 -c %s -o %t.main.o +// RUN: %clangxx -target x86_64-pc-linux -gsplit-dwarf -gdwarf-5 -DMAIN=0 -c %s -o %t.foo.o // RUN: ld.lld %t.main.o %t.foo.o -o %t.nonames // RUN: llvm-dwp %t.main.dwo %t.foo.dwo -o %t.nonames.dwp // RUN: rm %t.main.dwo %t.foo.dwo @@ -35,8 +35,8 @@ // Test that if we have one .o file with .debug_names and one without, that we // save a partial DWARF index. -// RUN: %clang -target x86_64-pc-linux -gsplit-dwarf -gdwarf-5 -DMAIN=1 -c %s -o %t.main.o -gpubnames -// RUN: %clang -target x86_64-pc-linux -gsplit-dwarf -gdwarf-5 -DMAIN=0 -c %s -o %t.foo.o +// RUN: %clangxx -target x86_64-pc-linux -gsplit-dwarf -gdwarf-5 -DMAIN=1 -c %s -o %t.main.o -gpubnames +// RUN: %clangxx -target x86_64-pc-linux -gsplit-dwarf -gdwarf-5 -DMAIN=0 -c %s -o %t.foo.o // RUN: ld.lld %t.main.o %t.foo.o -o %t.somenames // RUN: llvm-dwp %t.main.dwo %t.foo.dwo -o %t.somenames.dwp // RUN: rm %t.main.dwo %t.foo.dwo diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/dwp-separate-debug-file.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/dwp-separate-debug-file.cpp index 888e96bbb10a..f625fda2087d 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/dwp-separate-debug-file.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/dwp-separate-debug-file.cpp @@ -1,7 +1,7 @@ // REQUIRES: lld, python // Now test with DWARF5 -// RUN: %clang -target x86_64-pc-linux -gsplit-dwarf -gdwarf-5 -c %s -o %t.dwarf5.o +// RUN: %clangxx -target x86_64-pc-linux -gsplit-dwarf -gdwarf-5 -c %s -o %t.dwarf5.o // RUN: ld.lld %t.dwarf5.o -o %t.dwarf5 // RUN: llvm-dwp %t.dwarf5.dwo -o %t.dwarf5.dwp // RUN: rm %t.dwarf5.dwo @@ -64,7 +64,7 @@ // RUN: -b %t.dwarf5.debug 2>&1 | FileCheck %s -check-prefix=NODWP // Now test with DWARF4 -// RUN: %clang -target x86_64-pc-linux -gsplit-dwarf -gdwarf-4 -c %s -o %t.dwarf4.o +// RUN: %clangxx -target x86_64-pc-linux -gsplit-dwarf -gdwarf-4 -c %s -o %t.dwarf4.o // RUN: ld.lld %t.dwarf4.o -o %t.dwarf4 // RUN: llvm-dwp %t.dwarf4.dwo -o %t.dwarf4.dwp // RUN: rm %t.dwarf4.dwo @@ -128,7 +128,7 @@ // Test if we have a GNU build ID in our main executable and in our debug file, // and we have a .dwp file that doesn't, that we can still load our .dwp file. -// RUN: %clang -target x86_64-pc-linux -gsplit-dwarf -gdwarf-5 -c %s -o %t.o +// RUN: %clangxx -target x86_64-pc-linux -gsplit-dwarf -gdwarf-5 -c %s -o %t.o // RUN: ld.lld %t.o --build-id=md5 -o %t // RUN: llvm-dwp %t.dwo -o %t.dwp // RUN: rm %t.dwo diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/find-basic-function.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/find-basic-function.cpp index c42f9fe0b8b5..a00b2bd9506e 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/find-basic-function.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/find-basic-function.cpp @@ -1,6 +1,6 @@ // REQUIRES: lld -// RUN: %clang %s -g -c -o %t.o --target=x86_64-pc-linux -gno-pubnames +// RUN: %clangxx %s -g -c -o %t.o --target=x86_64-pc-linux -gno-pubnames // RUN: ld.lld %t.o -o %t // RUN: lldb-test symbols --name=foo --find=function --function-flags=base %t | \ // RUN: FileCheck --check-prefix=BASE %s @@ -19,7 +19,7 @@ // RUN: lldb-test symbols --name=not_there --find=function %t | \ // RUN: FileCheck --check-prefix=EMPTY %s // -// RUN: %clang %s -g -c -o %t --target=x86_64-apple-macosx +// RUN: %clangxx %s -g -c -o %t --target=x86_64-apple-macosx // RUN: lldb-test symbols --name=foo --find=function --function-flags=base %t | \ // RUN: FileCheck --check-prefix=BASE %s // RUN: lldb-test symbols --name=foo --find=function --function-flags=method %t | \ @@ -39,7 +39,7 @@ // RUN: lldb-test symbols --name=not_there --find=function %t | \ // RUN: FileCheck --check-prefix=EMPTY %s -// RUN: %clang %s -c -o %t.o --target=x86_64-pc-linux -gdwarf-5 -gpubnames +// RUN: %clangxx %s -c -o %t.o --target=x86_64-pc-linux -gdwarf-5 -gpubnames // RUN: ld.lld %t.o -o %t // RUN: llvm-readobj --sections %t | FileCheck %s --check-prefix NAMES // RUN: lldb-test symbols --name=foo --find=function --function-flags=base %t | \ diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/find-basic-namespace.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/find-basic-namespace.cpp index 13d50af7ef60..14c73c3e82ef 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/find-basic-namespace.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/find-basic-namespace.cpp @@ -1,6 +1,6 @@ // REQUIRES: lld -// RUN: %clang %s -g -c -o %t.o --target=x86_64-pc-linux -gno-pubnames +// RUN: %clangxx %s -g -c -o %t.o --target=x86_64-pc-linux -gno-pubnames // RUN: ld.lld %t.o -o %t // RUN: lldb-test symbols --name=foo --find=namespace %t | \ // RUN: FileCheck --check-prefix=FOO %s @@ -9,7 +9,7 @@ // RUN: lldb-test symbols --name=not_there --find=namespace %t | \ // RUN: FileCheck --check-prefix=EMPTY %s // -// RUN: %clang %s -g -c -o %t --target=x86_64-apple-macosx +// RUN: %clangxx %s -g -c -o %t --target=x86_64-apple-macosx // RUN: lldb-test symbols --name=foo --find=namespace %t | \ // RUN: FileCheck --check-prefix=FOO %s // RUN: lldb-test symbols --name=foo --find=namespace --context=context %t | \ @@ -17,7 +17,7 @@ // RUN: lldb-test symbols --name=not_there --find=namespace %t | \ // RUN: FileCheck --check-prefix=EMPTY %s -// RUN: %clang %s -c -o %t.o --target=x86_64-pc-linux -gdwarf-5 -gpubnames +// RUN: %clangxx %s -c -o %t.o --target=x86_64-pc-linux -gdwarf-5 -gpubnames // RUN: ld.lld %t.o -o %t // RUN: llvm-readobj --sections %t | FileCheck %s --check-prefix NAMES // RUN: lldb-test symbols --name=foo --find=namespace %t | \ diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/find-basic-type.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/find-basic-type.cpp index af4920660872..315fab344dfe 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/find-basic-type.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/find-basic-type.cpp @@ -1,6 +1,6 @@ // REQUIRES: lld -// RUN: %clang %s -g -c -o %t.o --target=x86_64-pc-linux -gno-pubnames +// RUN: %clangxx %s -g -c -o %t.o --target=x86_64-pc-linux -gno-pubnames // RUN: ld.lld %t.o -o %t // RUN: lldb-test symbols --name=foo --find=type %t | \ // RUN: FileCheck --check-prefix=NAME %s @@ -11,7 +11,7 @@ // RUN: lldb-test symbols --name=not_there --find=type %t | \ // RUN: FileCheck --check-prefix=EMPTY %s // -// RUN: %clang %s -g -c -o %t --target=x86_64-apple-macosx +// RUN: %clangxx %s -g -c -o %t --target=x86_64-apple-macosx // RUN: lldb-test symbols --name=foo --find=type %t | \ // RUN: FileCheck --check-prefix=NAME %s // RUN: lldb-test symbols --name=::foo --find=type %t | \ @@ -21,7 +21,7 @@ // RUN: lldb-test symbols --name=not_there --find=type %t | \ // RUN: FileCheck --check-prefix=EMPTY %s -// RUN: %clang %s -c -o %t.o --target=x86_64-pc-linux -gdwarf-5 -gpubnames +// RUN: %clangxx %s -c -o %t.o --target=x86_64-pc-linux -gdwarf-5 -gpubnames // RUN: ld.lld %t.o -o %t // RUN: llvm-readobj --sections %t | FileCheck %s --check-prefix NAMES // RUN: lldb-test symbols --name=foo --find=type %t | \ diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/find-basic-variable.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/find-basic-variable.cpp index e46fa14489d3..b6e2252c2840 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/find-basic-variable.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/find-basic-variable.cpp @@ -1,6 +1,6 @@ // REQUIRES: lld -// RUN: %clang %s -g -c -o %t.o --target=x86_64-pc-linux -gno-pubnames +// RUN: %clangxx %s -g -c -o %t.o --target=x86_64-pc-linux -gno-pubnames // RUN: ld.lld %t.o -o %t // RUN: lldb-test symbols --name=foo --find=variable --context=context %t | \ // RUN: FileCheck --check-prefix=CONTEXT %s @@ -11,7 +11,7 @@ // RUN: lldb-test symbols --name=not_there --find=variable %t | \ // RUN: FileCheck --check-prefix=EMPTY %s // -// RUN: %clang %s -g -c -o %t --target=x86_64-apple-macosx +// RUN: %clangxx %s -g -c -o %t --target=x86_64-apple-macosx // RUN: lldb-test symbols --name=foo --find=variable --context=context %t | \ // RUN: FileCheck --check-prefix=CONTEXT %s // RUN: lldb-test symbols --name=foo --find=variable %t | \ @@ -21,7 +21,7 @@ // RUN: lldb-test symbols --name=not_there --find=variable %t | \ // RUN: FileCheck --check-prefix=EMPTY %s // -// RUN: %clang %s -g -c -o %t.o --target=x86_64-pc-linux -gdwarf-5 -gpubnames +// RUN: %clangxx %s -g -c -o %t.o --target=x86_64-pc-linux -gdwarf-5 -gpubnames // RUN: ld.lld %t.o -o %t // RUN: llvm-readobj --sections %t | FileCheck %s --check-prefix NAMES // RUN: lldb-test symbols --name=foo --find=variable --context=context %t | \ diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/find-function-regex.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/find-function-regex.cpp index be267596fb37..5c7ad844f660 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/find-function-regex.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/find-function-regex.cpp @@ -1,13 +1,13 @@ // REQUIRES: lld -// RUN: %clang %s -g -c -o %t.o --target=x86_64-pc-linux -gno-pubnames +// RUN: %clangxx %s -g -c -o %t.o --target=x86_64-pc-linux -gno-pubnames // RUN: ld.lld %t.o -o %t // RUN: lldb-test symbols --name=f.o --regex --find=function %t | FileCheck %s // -// RUN: %clang %s -g -c -o %t --target=x86_64-apple-macosx +// RUN: %clangxx %s -g -c -o %t --target=x86_64-apple-macosx // RUN: lldb-test symbols --name=f.o --regex --find=function %t | FileCheck %s -// RUN: %clang %s -g -c -o %t.o --target=x86_64-pc-linux -gdwarf-5 -gpubnames +// RUN: %clangxx %s -g -c -o %t.o --target=x86_64-pc-linux -gdwarf-5 -gpubnames // RUN: ld.lld %t.o -o %t // RUN: llvm-readobj --sections %t | FileCheck %s --check-prefix NAMES // RUN: lldb-test symbols --name=f.o --regex --find=function %t | FileCheck %s diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/find-method-local-struct.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/find-method-local-struct.cpp index 3da4a4a23f8a..46553a83081e 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/find-method-local-struct.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/find-method-local-struct.cpp @@ -1,4 +1,4 @@ -// RUN: %clang %s -g -c -o %t --target=x86_64-apple-macosx +// RUN: %clangxx %s -g -c -o %t --target=x86_64-apple-macosx // RUN: lldb-test symbols --name=foo --find=function --function-flags=method %t | \ // RUN: FileCheck %s diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/find-method.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/find-method.cpp index 9f8b3df2f31a..26faf8907b4a 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/find-method.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/find-method.cpp @@ -1,15 +1,15 @@ // REQUIRES: lld -// RUN: %clang %s -g -c -o %t.o --target=x86_64-pc-linux -gno-pubnames +// RUN: %clangxx %s -g -c -o %t.o --target=x86_64-pc-linux -gno-pubnames // RUN: ld.lld %t.o -o %t // RUN: lldb-test symbols --name=foo --find=function --function-flags=method %t | \ // RUN: FileCheck %s // -// RUN: %clang %s -g -c -o %t --target=x86_64-apple-macosx +// RUN: %clangxx %s -g -c -o %t --target=x86_64-apple-macosx // RUN: lldb-test symbols --name=foo --find=function --function-flags=method %t | \ // RUN: FileCheck %s -// RUN: %clang %s -c -o %t.o --target=x86_64-pc-linux -gdwarf-5 -gpubnames +// RUN: %clangxx %s -c -o %t.o --target=x86_64-pc-linux -gdwarf-5 -gpubnames // RUN: ld.lld %t.o -o %t // RUN: llvm-readobj --sections %t | FileCheck %s --check-prefix NAMES // RUN: lldb-test symbols --name=foo --find=function --function-flags=method %t | \ diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/find-qualified-variable.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/find-qualified-variable.cpp index 1ad3e7fbadf5..e3f9ce308b75 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/find-qualified-variable.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/find-qualified-variable.cpp @@ -1,4 +1,4 @@ -// RUN: %clang %s -g -c -o %t --target=x86_64-apple-macosx +// RUN: %clangxx %s -g -c -o %t --target=x86_64-apple-macosx // RUN: lldb-test symbols --name=A::foo --find=variable %t | FileCheck %s // CHECK: Found 1 variables: diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/find-variable-dwo.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/find-variable-dwo.cpp index b5d35e4f7883..250b34377acd 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/find-variable-dwo.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/find-variable-dwo.cpp @@ -1,9 +1,9 @@ // REQUIRES: lld -// RUN: %clang %s -gdwarf-5 -gpubnames -gsplit-dwarf -c -emit-llvm -o - --target=x86_64-pc-linux -DONE | \ +// RUN: %clangxx %s -gdwarf-5 -gpubnames -gsplit-dwarf -c -emit-llvm -o - --target=x86_64-pc-linux -DONE | \ // RUN: llc -filetype=obj -split-dwarf-file=%t-1.dwo -o %t-1.o // RUN: llvm-objcopy --split-dwo=%t-1.dwo %t-1.o -// RUN: %clang %s -gdwarf-5 -gpubnames -gsplit-dwarf -c -emit-llvm -o - --target=x86_64-pc-linux -DTWO | \ +// RUN: %clangxx %s -gdwarf-5 -gpubnames -gsplit-dwarf -c -emit-llvm -o - --target=x86_64-pc-linux -DTWO | \ // RUN: llc -filetype=obj -split-dwarf-file=%t-2.dwo -o %t-2.o // RUN: llvm-objcopy --split-dwo=%t-2.dwo %t-2.o // RUN: ld.lld %t-1.o %t-2.o -o %t diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/find-variable-file.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/find-variable-file.cpp index f1a9a4eb12d0..3a8cf89ac367 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/find-variable-file.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/find-variable-file.cpp @@ -1,7 +1,7 @@ // REQUIRES: lld -// RUN: %clang -g -c -o %t-1.o --target=x86_64-pc-linux -gno-pubnames %s -// RUN: %clang -g -c -o %t-2.o --target=x86_64-pc-linux -gno-pubnames %S/Inputs/find-variable-file-2.cpp +// RUN: %clangxx -g -c -o %t-1.o --target=x86_64-pc-linux -gno-pubnames %s +// RUN: %clangxx -g -c -o %t-2.o --target=x86_64-pc-linux -gno-pubnames %S/Inputs/find-variable-file-2.cpp // RUN: ld.lld %t-1.o %t-2.o -o %t // RUN: lldb-test symbols --file=find-variable-file.cpp --find=variable %t | \ // RUN: FileCheck --check-prefix=ONE %s @@ -10,16 +10,16 @@ // Run the same test with split-dwarf. This is interesting because the two // split compile units will have the same offset (0). -// RUN: %clang -g -c -o %t-1.o --target=x86_64-pc-linux -gsplit-dwarf %s -// RUN: %clang -g -c -o %t-2.o --target=x86_64-pc-linux -gsplit-dwarf %S/Inputs/find-variable-file-2.cpp +// RUN: %clangxx -g -c -o %t-1.o --target=x86_64-pc-linux -gsplit-dwarf %s +// RUN: %clangxx -g -c -o %t-2.o --target=x86_64-pc-linux -gsplit-dwarf %S/Inputs/find-variable-file-2.cpp // RUN: ld.lld %t-1.o %t-2.o -o %t // RUN: lldb-test symbols --file=find-variable-file.cpp --find=variable %t | \ // RUN: FileCheck --check-prefix=ONE %s // RUN: lldb-test symbols --file=find-variable-file-2.cpp --find=variable %t | \ // RUN: FileCheck --check-prefix=TWO %s -// RUN: %clang -c -o %t-1.o --target=x86_64-pc-linux -gdwarf-5 -gpubnames %s -// RUN: %clang -c -o %t-2.o --target=x86_64-pc-linux -gdwarf-5 -gpubnames %S/Inputs/find-variable-file-2.cpp +// RUN: %clangxx -c -o %t-1.o --target=x86_64-pc-linux -gdwarf-5 -gpubnames %s +// RUN: %clangxx -c -o %t-2.o --target=x86_64-pc-linux -gdwarf-5 -gpubnames %S/Inputs/find-variable-file-2.cpp // RUN: ld.lld %t-1.o %t-2.o -o %t // RUN: llvm-readobj --sections %t | FileCheck %s --check-prefix NAMES // RUN: lldb-test symbols --file=find-variable-file.cpp --find=variable %t | \ @@ -29,9 +29,9 @@ // Run the same test with split dwarf and pubnames to check whether we can find // the compile unit using the name index if it is split. -// RUN: %clang -c -o %t-1.o --target=x86_64-pc-linux -gdwarf-5 -gsplit-dwarf -gpubnames %s -// RUN: %clang -c -o %t-2.o --target=x86_64-pc-linux -gdwarf-5 -gsplit-dwarf -gpubnames %S/Inputs/find-variable-file-2.cpp -// RUN: %clang -c -o %t-3.o --target=x86_64-pc-linux -gdwarf-5 -gsplit-dwarf -gpubnames %S/Inputs/find-variable-file-3.cpp +// RUN: %clangxx -c -o %t-1.o --target=x86_64-pc-linux -gdwarf-5 -gsplit-dwarf -gpubnames %s +// RUN: %clangxx -c -o %t-2.o --target=x86_64-pc-linux -gdwarf-5 -gsplit-dwarf -gpubnames %S/Inputs/find-variable-file-2.cpp +// RUN: %clangxx -c -o %t-3.o --target=x86_64-pc-linux -gdwarf-5 -gsplit-dwarf -gpubnames %S/Inputs/find-variable-file-3.cpp // RUN: ld.lld %t-1.o %t-2.o %t-3.o -o %t // RUN: llvm-readobj --sections %t | FileCheck %s --check-prefix NAMES // RUN: lldb-test symbols --file=find-variable-file.cpp --find=variable %t | \ diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/member-pointers.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/member-pointers.cpp index a12892305798..00805770af11 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/member-pointers.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/member-pointers.cpp @@ -1,7 +1,7 @@ // REQUIRES: lld
// Itanium ABI:
-// RUN: %clang --target=x86_64-pc-linux -gdwarf -c -o %t_linux.o %s
+// RUN: %clangxx --target=x86_64-pc-linux -gdwarf -c -o %t_linux.o %s
// RUN: %lldb -f %t_linux.o -b -o "target variable s1 s2 m1 m2 v1 v2 v3 v4" | FileCheck --check-prefix=CHECK-GNU %s
//
// CHECK-GNU: (void (Single1::*)()) s1 = 0x00000000000000000000000000000000
diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/module-ownership.mm b/lldb/test/Shell/SymbolFile/DWARF/x86/module-ownership.mm index 2dec109a781c..27aa1365ab54 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/module-ownership.mm +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/module-ownership.mm @@ -1,5 +1,5 @@ // RUN: rm -rf %t.cache -// RUN: %clang --target=x86_64-apple-macosx -g -gmodules -Wno-objc-root-class \ +// RUN: %clangxx --target=x86_64-apple-macosx -g -gmodules -Wno-objc-root-class \ // RUN: -fmodules -fmodules-cache-path=%t.cache \ // RUN: -c -o %t.o %s -I%S/Inputs // RUN: lldb-test symbols -dump-clang-ast %t.o | FileCheck --check-prefix CHECK-ANON-S1 %s diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/no_unique_address-with-bitfields.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/no_unique_address-with-bitfields.cpp index 297fb82caee5..8f530c803a40 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/no_unique_address-with-bitfields.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/no_unique_address-with-bitfields.cpp @@ -1,4 +1,4 @@ -// RUN: %clang --target=x86_64-apple-macosx -c -gdwarf -o %t %s +// RUN: %clangxx --target=x86_64-apple-macosx -c -gdwarf -o %t %s // RUN: %lldb %t \ // RUN: -o "target var global" \ // RUN: -o "target var global2" \ diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/type-definition-search.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/type-definition-search.cpp index 5a40a6e0fbc2..5ab45eefd221 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/type-definition-search.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/type-definition-search.cpp @@ -4,18 +4,18 @@ // REQUIRES: lld -// RUN: %clang --target=x86_64-pc-linux -c %s -o %t-n-a.o -g -gsimple-template-names -DFILE_A -// RUN: %clang --target=x86_64-pc-linux -c %s -o %t-n-b.o -g -gsimple-template-names -DFILE_B +// RUN: %clangxx --target=x86_64-pc-linux -c %s -o %t-n-a.o -g -gsimple-template-names -DFILE_A +// RUN: %clangxx --target=x86_64-pc-linux -c %s -o %t-n-b.o -g -gsimple-template-names -DFILE_B // RUN: ld.lld %t-n-a.o %t-n-b.o -o %t-n // RUN: %lldb %t-n -o "target variable --ptr-depth 1 --show-types both_a both_b" -o exit | FileCheck %s -// RUN: %clang --target=x86_64-pc-linux -c %s -o %t-t-a.o -g -fdebug-types-section -DFILE_A -// RUN: %clang --target=x86_64-pc-linux -c %s -o %t-t-b.o -g -fdebug-types-section -DFILE_B +// RUN: %clangxx --target=x86_64-pc-linux -c %s -o %t-t-a.o -g -fdebug-types-section -DFILE_A +// RUN: %clangxx --target=x86_64-pc-linux -c %s -o %t-t-b.o -g -fdebug-types-section -DFILE_B // RUN: ld.lld %t-t-a.o %t-t-b.o -o %t-t // RUN: %lldb %t-t -o "target variable --ptr-depth 1 --show-types both_a both_b" -o exit | FileCheck %s -// RUN: %clang --target=x86_64-pc-linux -c %s -o %t-tn-a.o -g -fdebug-types-section -gsimple-template-names -DFILE_A -// RUN: %clang --target=x86_64-pc-linux -c %s -o %t-tn-b.o -g -fdebug-types-section -gsimple-template-names -DFILE_B +// RUN: %clangxx --target=x86_64-pc-linux -c %s -o %t-tn-a.o -g -fdebug-types-section -gsimple-template-names -DFILE_A +// RUN: %clangxx --target=x86_64-pc-linux -c %s -o %t-tn-b.o -g -fdebug-types-section -gsimple-template-names -DFILE_B // RUN: ld.lld %t-tn-a.o %t-tn-b.o -o %t-tn // RUN: %lldb %t-tn -o "target variable --ptr-depth 1 --show-types both_a both_b" -o exit | FileCheck %s diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/type-unit-same-basename.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/type-unit-same-basename.cpp index f7f5a30aaba9..f9fd5b5e5225 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/type-unit-same-basename.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/type-unit-same-basename.cpp @@ -5,8 +5,8 @@ // REQUIRES: lld -// RUN: %clang --target=x86_64-pc-linux -c %s -o %t-a.o -g -fdebug-types-section -flimit-debug-info -DFILE_A -// RUN: %clang --target=x86_64-pc-linux -c %s -o %t-b.o -g -fdebug-types-section -flimit-debug-info -DFILE_B +// RUN: %clangxx --target=x86_64-pc-linux -c %s -o %t-a.o -g -fdebug-types-section -flimit-debug-info -DFILE_A +// RUN: %clangxx --target=x86_64-pc-linux -c %s -o %t-b.o -g -fdebug-types-section -flimit-debug-info -DFILE_B // RUN: ld.lld -z undefs %t-a.o %t-b.o -o %t // RUN: %lldb %t -o "target variable x" -o exit | FileCheck %s diff --git a/lldb/unittests/SymbolFile/DWARF/DWARFASTParserClangTests.cpp b/lldb/unittests/SymbolFile/DWARF/DWARFASTParserClangTests.cpp index 1abce6999874..064ed6d1d3e5 100644 --- a/lldb/unittests/SymbolFile/DWARF/DWARFASTParserClangTests.cpp +++ b/lldb/unittests/SymbolFile/DWARF/DWARFASTParserClangTests.cpp @@ -1651,3 +1651,93 @@ DWARF: EXPECT_EQ(param_die, ast_parser.GetObjectParameter(sub2, context_die)); } } + +TEST_F(DWARFASTParserClangTests, TestTypeBitSize) { + // Tests that we correctly parse DW_AT_bit_size of a DW_AT_base_type. + + const char *yamldata = R"( +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_AARCH64 +DWARF: + debug_str: + - _BitInt(2) + debug_abbrev: + - ID: 0 + Table: + - Code: 0x1 + Tag: DW_TAG_compile_unit + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_language + Form: DW_FORM_data2 + - Code: 0x2 + Tag: DW_TAG_base_type + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_encoding + Form: DW_FORM_data1 + - Attribute: DW_AT_byte_size + Form: DW_FORM_data1 + - Attribute: DW_AT_bit_size + Form: DW_FORM_data1 + + debug_info: + - Version: 5 + UnitType: DW_UT_compile + AddrSize: 8 + Entries: + +# DW_TAG_compile_unit +# DW_AT_language [DW_FORM_data2] (DW_LANG_C_plus_plus) + + - AbbrCode: 0x1 + Values: + - Value: 0x04 + +# DW_TAG_base_type +# DW_AT_name [DW_FORM_strp] ('_BitInt(2)') + + - AbbrCode: 0x2 + Values: + - Value: 0x0 + - Value: 0x05 + - Value: 0x01 + - Value: 0x02 +... +)"; + + YAMLModuleTester t(yamldata); + + DWARFUnit *unit = t.GetDwarfUnit(); + ASSERT_NE(unit, nullptr); + const DWARFDebugInfoEntry *cu_entry = unit->DIE().GetDIE(); + ASSERT_EQ(cu_entry->Tag(), DW_TAG_compile_unit); + ASSERT_EQ(unit->GetDWARFLanguageType(), DW_LANG_C_plus_plus); + DWARFDIE cu_die(unit, cu_entry); + + auto holder = std::make_unique<clang_utils::TypeSystemClangHolder>("ast"); + auto &ast_ctx = *holder->GetAST(); + DWARFASTParserClangStub ast_parser(ast_ctx); + + auto type_die = cu_die.GetFirstChild(); + ASSERT_TRUE(type_die.IsValid()); + ASSERT_EQ(type_die.Tag(), DW_TAG_base_type); + + ParsedDWARFTypeAttributes attrs(type_die); + EXPECT_EQ(attrs.byte_size.value_or(0), 1U); + EXPECT_EQ(attrs.data_bit_size.value_or(0), 2U); + + SymbolContext sc; + auto type_sp = + ast_parser.ParseTypeFromDWARF(sc, type_die, /*type_is_new_ptr=*/nullptr); + ASSERT_NE(type_sp, nullptr); + + EXPECT_EQ(llvm::expectedToOptional(type_sp->GetByteSize(nullptr)).value_or(0), + 1U); +} diff --git a/llvm/Maintainers.md b/llvm/Maintainers.md index e52259236fc1..1eba955f9d6e 100644 --- a/llvm/Maintainers.md +++ b/llvm/Maintainers.md @@ -197,7 +197,7 @@ david.green@arm.com (email), [davemgreen](https://github.com/davemgreen) (GitHub Amara Emerson (esp. AArch64 GlobalISel) \ amara@apple.com (email), [aemerson](https://github.com/aemerson) (GitHub) \ Eli Friedman (esp. ARM64EC) \ -efriedma@quicinc.com (email), [efriedma-quic](https://github.com/efriedma-quic) (GitHub) \ +efriedma@qti.qualcomm.com (email), [efriedma-quic](https://github.com/efriedma-quic) (GitHub) \ Sjoerd Meijer \ smeijer@nvidia.com (email), [sjoerdmeijer](https://github.com/sjoerdmeijer) (GitHub) \ Nashe Mncube \ @@ -246,7 +246,7 @@ mail@justinbogner.com (email), [bogner](https://github.com/bogner) (GitHub) #### Hexagon backend Sundeep Kushwaha \ -sundeepk@quicinc.com (email), [SundeepKushwaha](https://github.com/SundeepKushwaha) (GitHub) +sundeepk@qti.qualcomm.com (email), [SundeepKushwaha](https://github.com/SundeepKushwaha) (GitHub) #### Lanai backend diff --git a/llvm/docs/GettingInvolved.rst b/llvm/docs/GettingInvolved.rst index 4b4b09ad87ab..039d61624093 100644 --- a/llvm/docs/GettingInvolved.rst +++ b/llvm/docs/GettingInvolved.rst @@ -223,6 +223,10 @@ what to add to your calendar invite. - `ics <https://calendar.google.com/calendar/ical/c_673c6cd64474c0aff173bf8fa609559f93d654e0984d9d91d71abd32d28c0486%40group.calendar.google.com/public/basic.ics>`__ `gcal <https://calendar.google.com/calendar/embed?src=c_673c6cd64474c0aff173bf8fa609559f93d654e0984d9d91d71abd32d28c0486%40group.calendar.google.com&ctz=America%2FLos_Angeles>`__ - + * - GlobalISel + - Every 2nd Tuesday of the month + - `gcal <https://calendar.google.com/calendar/u/0?cid=ZDcyMjc0ZjZiZjNhMzFlYmE3NTNkMWM2MGM2NjM5ZWU3ZDE2MjM4MGFlZDc2ZjViY2UyYzMwNzVhZjk4MzQ4ZEBncm91cC5jYWxlbmRhci5nb29nbGUuY29t>`__ + - `Meeting details/agenda <https://docs.google.com/document/d/1Ry8O4-Tm5BFj9AMjr8qTQFU80z-ptiNQ62687NaIvLs/edit?usp=sharing>`__ For event owners, our Discord bot also supports sending automated announcements @@ -254,10 +258,6 @@ the future. - `ics <https://calendar.google.com/calendar/ical/c_1mincouiltpa24ac14of14lhi4%40group.calendar.google.com/public/basic.ics>`__ `gcal <https://calendar.google.com/calendar/embed?src=c_1mincouiltpa24ac14of14lhi4%40group.calendar.google.com>`__ - `Minutes/docs <https://docs.google.com/document/d/1-uEEZfmRdPThZlctOq9eXlmUaSSAAi8oKxhrPY_lpjk/edit#>`__ - * - GlobalISel - - Every 2nd Tuesday of the month - - `gcal <https://calendar.google.com/calendar/u/0?cid=ZDcyMjc0ZjZiZjNhMzFlYmE3NTNkMWM2MGM2NjM5ZWU3ZDE2MjM4MGFlZDc2ZjViY2UyYzMwNzVhZjk4MzQ4ZEBncm91cC5jYWxlbmRhci5nb29nbGUuY29t>`__ - - `Meeting details/agenda <https://docs.google.com/document/d/1Ry8O4-Tm5BFj9AMjr8qTQFU80z-ptiNQ62687NaIvLs/edit?usp=sharing>`__ * - Vector Predication - Every 2 weeks on Tuesdays, 3pm UTC - diff --git a/llvm/docs/HowToCrossCompileBuiltinsOnArm.rst b/llvm/docs/HowToCrossCompileBuiltinsOnArm.rst index d7759ad8edd0..58599404d5cd 100644 --- a/llvm/docs/HowToCrossCompileBuiltinsOnArm.rst +++ b/llvm/docs/HowToCrossCompileBuiltinsOnArm.rst @@ -8,18 +8,18 @@ Introduction This document contains information about building and testing the builtins part of compiler-rt for an Arm target, from an x86_64 Linux machine. -While this document concentrates on Arm and Linux the general principles should +While this document concentrates on Arm and Linux, the general principles should apply to other targets supported by compiler-rt. Further contributions for other targets are welcome. The instructions in this document depend on libraries and programs external to -LLVM, there are many ways to install and configure these dependencies so you +LLVM. There are many ways to install and configure these dependencies, so you may need to adapt the instructions here to fit your own situation. Prerequisites ============= -In this use case we will be using cmake on a Debian-based Linux system, +In this use case, we will be using cmake on a Debian-based Linux system, cross-compiling from an x86_64 host to a hard-float Armv7-A target. We will be using as many of the LLVM tools as we can, but it is possible to use GNU equivalents. @@ -35,7 +35,7 @@ You will need: An existing sysroot is required because some of the builtins include C library headers and a sysroot is the easiest way to get those. -In this example we will be using ``ninja`` as the build tool. +In this example, we will be using ``ninja`` as the build tool. See https://compiler-rt.llvm.org/ for information about the dependencies on clang and LLVM. @@ -46,7 +46,7 @@ the source for LLVM and compiler-rt. ``qemu-arm`` should be available as a package for your Linux distribution. The most complicated of the prerequisites to satisfy is the ``arm-linux-gnueabihf`` -sysroot. In theory it is possible to use the Linux distributions multiarch +sysroot. In theory, it is possible to use the Linux distributions multiarch support to fulfill the dependencies for building but unfortunately due to ``/usr/local/include`` being added some host includes are selected. @@ -153,7 +153,7 @@ The cmake try compile stage fails At an early stage cmake will attempt to compile and link a simple C program to test if the toolchain is working. -This stage can often fail at link time if the ``--sysroot=``, ``--target`` or +This stage can often fail at link time if the ``--sysroot=``, ``--target``, or ``--gcc-toolchain=`` options are not passed to the compiler. Check the ``CMAKE_<LANGUAGE>_FLAGS`` and ``CMAKE_<LANGAUGE>_COMPILER_TARGET`` flags along with any of the specific CMake sysroot and toolchain options. @@ -165,7 +165,7 @@ to make sure it is working. For example:: Clang uses the host header files -------------------------------- -On debian based systems it is possible to install multiarch support for +On Debian-based systems, it is possible to install multiarch support for ``arm-linux-gnueabi`` and ``arm-linux-gnueabihf``. In many cases clang can successfully use this multiarch support when ``--gcc-toolchain=`` and ``--sysroot=`` are not supplied. Unfortunately clang adds ``/usr/local/include`` before @@ -177,8 +177,8 @@ use a separate ``arm-linux-gnueabihf`` toolchain. No target passed to clang ------------------------- -If clang is not given a target it will typically use the host target, this will -not understand the Arm assembly language files resulting in error messages such +If clang is not given a target, it will typically use the host target. This will +not understand the Arm assembly language files, resulting in error messages such as ``error: unknown directive .syntax unified``. You can check the clang invocation in the error message to see if there is no @@ -217,7 +217,7 @@ target to use is: * ``-DCMAKE_C_COMPILER_TARGET=arm-linux-gnueabi`` -Depending on whether you want to use floating point instructions or not you +Depending on whether you want to use floating point instructions or not, you may need extra c-flags such as ``-mfloat-abi=softfp`` for use of floating-point instructions, and ``-mfloat-abi=soft -mfpu=none`` for software floating-point emulation. @@ -241,7 +241,7 @@ To build and test the libraries using a similar method to Armv7-A is possible but more difficult. The main problems are: * There is not a ``qemu-arm`` user-mode emulator for bare-metal systems. - ``qemu-system-arm`` can be used but this is significantly more difficult + ``qemu-system-arm`` can be used, but this is significantly more difficult to setup. This document does not explain how to do this. * The targets to compile compiler-rt have the suffix ``-none-eabi``. This uses the BareMetal driver in clang and by default will not find the libraries @@ -252,8 +252,8 @@ that are supported on Armv7-A we can still get most of the value of running the tests using the same ``qemu-arm`` that we used for Armv7-A by building and running the test cases for Armv7-A but using the builtins compiled for Armv6-M, Armv7-M or Armv7E-M. This will test that the builtins can be linked -into a binary and execute the tests correctly but it will not catch if the -builtins use instructions that are supported on Armv7-A but not Armv6-M, +into a binary and execute the tests correctly, but it will not catch if the +builtins use instructions that are supported on Armv7-A but not on Armv6-M, Armv7-M and Armv7E-M. This requires a second ``arm-none-eabi`` toolchain for building the builtins. @@ -321,9 +321,9 @@ command for Armv7-A build and test:: The Armv6-M builtins will use the soft-float ABI. When compiling the tests for Armv7-A we must include ``"-mthumb -mfloat-abi=soft -mfpu=none"`` in the -test-c-flags. We must use an Armv7-A soft-float abi sysroot for ``qemu-arm``. +test-c-flags. We must use an Armv7-A soft-float ABI sysroot for ``qemu-arm``. -Depending on the linker used for the test cases you may encounter BuildAttribute +Depending on the linker used for the test cases, you may encounter BuildAttribute mismatches between the M-profile objects from compiler-rt and the A-profile objects from the test. The lld linker does not check the profile BuildAttribute so it can be used to link the tests by adding ``-fuse-ld=lld`` to the diff --git a/llvm/include/llvm/ADT/TypeSwitch.h b/llvm/include/llvm/ADT/TypeSwitch.h index 5657303b0a1f..50ca1d5a6b5b 100644 --- a/llvm/include/llvm/ADT/TypeSwitch.h +++ b/llvm/include/llvm/ADT/TypeSwitch.h @@ -111,6 +111,7 @@ public: return std::move(*result); return defaultFn(this->value); } + /// As a default, return the given value. [[nodiscard]] ResultT Default(ResultT defaultResult) { if (result) @@ -118,6 +119,22 @@ public: return defaultResult; } + /// Default for pointer-like results types that accept `nullptr`. + template <typename ArgT = ResultT, + typename = + std::enable_if_t<std::is_constructible_v<ArgT, std::nullptr_t>>> + [[nodiscard]] ResultT Default(std::nullptr_t) { + return Default(ResultT(nullptr)); + } + + /// Default for optional results types that accept `std::nullopt`. + template <typename ArgT = ResultT, + typename = + std::enable_if_t<std::is_constructible_v<ArgT, std::nullopt_t>>> + [[nodiscard]] ResultT Default(std::nullopt_t) { + return Default(ResultT(std::nullopt)); + } + /// Declare default as unreachable, making sure that all cases were handled. [[nodiscard]] ResultT DefaultUnreachable( const char *message = "Fell off the end of a type-switch") { diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h index 48650a6df22f..823753021ff7 100644 --- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h +++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h @@ -54,6 +54,10 @@ struct FunctionPathAndClusterInfo { DenseMap<UniqueBBID, uint64_t> NodeCounts; // Edge counts for each edge, stored as a nested map. DenseMap<UniqueBBID, DenseMap<UniqueBBID, uint64_t>> EdgeCounts; + // Hash for each basic block. The Hashes are stored for every original block + // (not cloned blocks), hence the map key being unsigned instead of + // UniqueBBID. + DenseMap<unsigned, uint64_t> BBHashes; }; class BasicBlockSectionsProfileReader { diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.td b/llvm/include/llvm/IR/RuntimeLibcalls.td index 7be1b654ca72..24c1b035d0dd 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.td +++ b/llvm/include/llvm/IR/RuntimeLibcalls.td @@ -1585,7 +1585,7 @@ def __aeabi_f2ulz : RuntimeLibcallImpl<FPTOUINT_F32_I64>; // CallingConv::ARM_AA // RTABI chapter 4.1.2, Table 7 def __aeabi_d2f : RuntimeLibcallImpl<FPROUND_F64_F32>; // CallingConv::ARM_AAPCS def __aeabi_d2h : RuntimeLibcallImpl<FPROUND_F64_F16>; // CallingConv::ARM_AAPCS -def __aeabi_f2d : RuntimeLibcallImpl<FPEXT_F32_F64>; // CallingConv::ARM_AAPCS +def __aeabi_f2d : RuntimeLibcallImpl<FPEXT_F32_F64>; // CallingConv::ARM_AAPCS // Integer to floating-point conversions. // RTABI chapter 4.1.2, Table 8 diff --git a/llvm/include/llvm/Object/ELFTypes.h b/llvm/include/llvm/Object/ELFTypes.h index e9a417d3d4fb..467ab6fd3c1e 100644 --- a/llvm/include/llvm/Object/ELFTypes.h +++ b/llvm/include/llvm/Object/ELFTypes.h @@ -834,30 +834,32 @@ struct BBAddrMap { bool OmitBBEntries : 1; bool CallsiteEndOffsets : 1; bool BBHash : 1; + bool PostLinkCfg : 1; bool hasPGOAnalysis() const { return FuncEntryCount || BBFreq || BrProb; } bool hasPGOAnalysisBBData() const { return BBFreq || BrProb; } // Encodes to minimum bit width representation. - uint8_t encode() const { - return (static_cast<uint8_t>(FuncEntryCount) << 0) | - (static_cast<uint8_t>(BBFreq) << 1) | - (static_cast<uint8_t>(BrProb) << 2) | - (static_cast<uint8_t>(MultiBBRange) << 3) | - (static_cast<uint8_t>(OmitBBEntries) << 4) | - (static_cast<uint8_t>(CallsiteEndOffsets) << 5) | - (static_cast<uint8_t>(BBHash) << 6); + uint16_t encode() const { + return (static_cast<uint16_t>(FuncEntryCount) << 0) | + (static_cast<uint16_t>(BBFreq) << 1) | + (static_cast<uint16_t>(BrProb) << 2) | + (static_cast<uint16_t>(MultiBBRange) << 3) | + (static_cast<uint16_t>(OmitBBEntries) << 4) | + (static_cast<uint16_t>(CallsiteEndOffsets) << 5) | + (static_cast<uint16_t>(BBHash) << 6) | + (static_cast<uint16_t>(PostLinkCfg) << 7); } // Decodes from minimum bit width representation and validates no // unnecessary bits are used. - static Expected<Features> decode(uint8_t Val) { + static Expected<Features> decode(uint16_t Val) { Features Feat{ static_cast<bool>(Val & (1 << 0)), static_cast<bool>(Val & (1 << 1)), static_cast<bool>(Val & (1 << 2)), static_cast<bool>(Val & (1 << 3)), static_cast<bool>(Val & (1 << 4)), static_cast<bool>(Val & (1 << 5)), - static_cast<bool>(Val & (1 << 6))}; + static_cast<bool>(Val & (1 << 6)), static_cast<bool>(Val & (1 << 7))}; if (Feat.encode() != Val) return createStringError( std::error_code(), "invalid encoding for BBAddrMap::Features: 0x%x", @@ -867,10 +869,11 @@ struct BBAddrMap { bool operator==(const Features &Other) const { return std::tie(FuncEntryCount, BBFreq, BrProb, MultiBBRange, - OmitBBEntries, CallsiteEndOffsets, BBHash) == + OmitBBEntries, CallsiteEndOffsets, BBHash, PostLinkCfg) == std::tie(Other.FuncEntryCount, Other.BBFreq, Other.BrProb, Other.MultiBBRange, Other.OmitBBEntries, - Other.CallsiteEndOffsets, Other.BBHash); + Other.CallsiteEndOffsets, Other.BBHash, + Other.PostLinkCfg); } }; @@ -1010,23 +1013,30 @@ struct PGOAnalysisMap { /// probability associated with it. struct SuccessorEntry { /// Unique ID of this successor basic block. - uint32_t ID; + uint32_t ID = 0; /// Branch Probability of the edge to this successor taken from MBPI. BranchProbability Prob; + /// Raw edge count from the post link profile (e.g., from bolt or + /// propeller). + uint64_t PostLinkFreq = 0; bool operator==(const SuccessorEntry &Other) const { - return std::tie(ID, Prob) == std::tie(Other.ID, Other.Prob); + return std::tie(ID, Prob, PostLinkFreq) == + std::tie(Other.ID, Other.Prob, Other.PostLinkFreq); } }; /// Block frequency taken from MBFI BlockFrequency BlockFreq; + /// Raw block count taken from the post link profile (e.g., from bolt or + /// propeller). + uint64_t PostLinkBlockFreq = 0; /// List of successors of the current block llvm::SmallVector<SuccessorEntry, 2> Successors; bool operator==(const PGOBBEntry &Other) const { - return std::tie(BlockFreq, Successors) == - std::tie(Other.BlockFreq, Other.Successors); + return std::tie(BlockFreq, PostLinkBlockFreq, Successors) == + std::tie(Other.BlockFreq, PostLinkBlockFreq, Other.Successors); } }; diff --git a/llvm/include/llvm/ObjectYAML/ELFYAML.h b/llvm/include/llvm/ObjectYAML/ELFYAML.h index a7c7c7c436dc..a8236ca37b5e 100644 --- a/llvm/include/llvm/ObjectYAML/ELFYAML.h +++ b/llvm/include/llvm/ObjectYAML/ELFYAML.h @@ -166,7 +166,7 @@ struct BBAddrMapEntry { std::optional<llvm::yaml::Hex64> Hash; }; uint8_t Version; - llvm::yaml::Hex8 Feature; + llvm::yaml::Hex16 Feature; struct BBRangeEntry { llvm::yaml::Hex64 BaseAddress; @@ -203,8 +203,10 @@ struct PGOAnalysisMapEntry { struct SuccessorEntry { uint32_t ID; llvm::yaml::Hex32 BrProb; + std::optional<uint32_t> PostLinkBrFreq; }; std::optional<uint64_t> BBFreq; + std::optional<uint32_t> PostLinkBBFreq; std::optional<std::vector<SuccessorEntry>> Successors; }; std::optional<uint64_t> FuncEntryCount; diff --git a/llvm/include/llvm/Support/LEB128.h b/llvm/include/llvm/Support/LEB128.h index 898b4ea1f19a..4e2262fb15c5 100644 --- a/llvm/include/llvm/Support/LEB128.h +++ b/llvm/include/llvm/Support/LEB128.h @@ -29,8 +29,7 @@ inline unsigned encodeSLEB128(int64_t Value, raw_ostream &OS, uint8_t Byte = Value & 0x7f; // NOTE: this assumes that this signed shift is an arithmetic right shift. Value >>= 7; - More = !((((Value == 0 ) && ((Byte & 0x40) == 0)) || - ((Value == -1) && ((Byte & 0x40) != 0)))); + More = Value != ((Byte & 0x40) ? -1 : 0); Count++; if (More || Count < PadTo) Byte |= 0x80; // Mark this byte to show that more bytes will follow. @@ -58,8 +57,7 @@ inline unsigned encodeSLEB128(int64_t Value, uint8_t *p, unsigned PadTo = 0) { uint8_t Byte = Value & 0x7f; // NOTE: this assumes that this signed shift is an arithmetic right shift. Value >>= 7; - More = !((((Value == 0 ) && ((Byte & 0x40) == 0)) || - ((Value == -1) && ((Byte & 0x40) != 0)))); + More = Value != ((Byte & 0x40) ? -1 : 0); Count++; if (More || Count < PadTo) Byte |= 0x80; // Mark this byte to show that more bytes will follow. diff --git a/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h b/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h index ced446dacb6c..9dcd4b53a0db 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h +++ b/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h @@ -26,8 +26,6 @@ namespace llvm { -LLVM_ABI extern cl::opt<bool> DebugInfoCorrelate; - class Function; class Instruction; class Module; diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp index 84ee8c0bf3e1..11d829492a10 100644 --- a/llvm/lib/Analysis/DependenceAnalysis.cpp +++ b/llvm/lib/Analysis/DependenceAnalysis.cpp @@ -2854,14 +2854,18 @@ bool DependenceInfo::testMIV(const SCEV *Src, const SCEV *Dst, banerjeeMIVtest(Src, Dst, Loops, Result); } -// Given a product, e.g., 10*X*Y, returns the first constant operand, -// in this case 10. If there is no constant part, returns std::nullopt. -static std::optional<APInt> getConstantPart(const SCEV *Expr) { +/// Given a SCEVMulExpr, returns its first operand if its first operand is a +/// constant and the product doesn't overflow in a signed sense. Otherwise, +/// returns std::nullopt. For example, given (10 * X * Y)<nsw>, it returns 10. +/// Notably, if it doesn't have nsw, the multiplication may overflow, and if +/// so, it may not a multiple of 10. +static std::optional<APInt> getConstanCoefficient(const SCEV *Expr) { if (const auto *Constant = dyn_cast<SCEVConstant>(Expr)) return Constant->getAPInt(); if (const auto *Product = dyn_cast<SCEVMulExpr>(Expr)) if (const auto *Constant = dyn_cast<SCEVConstant>(Product->getOperand(0))) - return Constant->getAPInt(); + if (Product->hasNoSignedWrap()) + return Constant->getAPInt(); return std::nullopt; } @@ -2887,7 +2891,7 @@ bool DependenceInfo::accumulateCoefficientsGCD(const SCEV *Expr, if (AddRec->getLoop() == CurLoop) { CurLoopCoeff = Step; } else { - std::optional<APInt> ConstCoeff = getConstantPart(Step); + std::optional<APInt> ConstCoeff = getConstanCoefficient(Step); // If the coefficient is the product of a constant and other stuff, we can // use the constant in the GCD computation. @@ -2940,7 +2944,7 @@ bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst, const SCEV *Coeff = AddRec->getStepRecurrence(*SE); // If the coefficient is the product of a constant and other stuff, // we can use the constant in the GCD computation. - std::optional<APInt> ConstCoeff = getConstantPart(Coeff); + std::optional<APInt> ConstCoeff = getConstanCoefficient(Coeff); if (!ConstCoeff) return false; RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff->abs()); @@ -2958,7 +2962,7 @@ bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst, const SCEV *Coeff = AddRec->getStepRecurrence(*SE); // If the coefficient is the product of a constant and other stuff, // we can use the constant in the GCD computation. - std::optional<APInt> ConstCoeff = getConstantPart(Coeff); + std::optional<APInt> ConstCoeff = getConstanCoefficient(Coeff); if (!ConstCoeff) return false; RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff->abs()); @@ -2979,7 +2983,7 @@ bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst, } else if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Operand)) { // Search for constant operand to participate in GCD; // If none found; return false. - std::optional<APInt> ConstOp = getConstantPart(Product); + std::optional<APInt> ConstOp = getConstanCoefficient(Product); if (!ConstOp) return false; ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD, ConstOp->abs()); @@ -3032,7 +3036,7 @@ bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst, Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff); // If the coefficient is the product of a constant and other stuff, // we can use the constant in the GCD computation. - std::optional<APInt> ConstCoeff = getConstantPart(Delta); + std::optional<APInt> ConstCoeff = getConstanCoefficient(Delta); if (!ConstCoeff) // The difference of the two coefficients might not be a product // or constant, in which case we give up on this direction. diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 8aa488f0efd8..f65d88a669f1 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1443,7 +1443,7 @@ getBBAddrMapFeature(const MachineFunction &MF, int NumMBBSectionRanges, MF.hasBBSections() && NumMBBSectionRanges > 1, // Use static_cast to avoid breakage of tests on windows. static_cast<bool>(BBAddrMapSkipEmitBBEntries), HasCalls, - static_cast<bool>(EmitBBHash)}; + static_cast<bool>(EmitBBHash), false}; } void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) { diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index fbcd614b85d1..485b44ae4c4a 100644 --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -287,6 +287,25 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { } continue; } + case 'h': { // Basic block hash secifier. + // Skip the profile when the profile iterator (FI) refers to the + // past-the-end element. + if (FI == ProgramPathAndClusterInfo.end()) + continue; + for (auto BBIDHashStr : Values) { + auto [BBIDStr, HashStr] = BBIDHashStr.split(':'); + unsigned long long BBID = 0, Hash = 0; + if (getAsUnsignedInteger(BBIDStr, 10, BBID)) + return createProfileParseError(Twine("unsigned integer expected: '") + + BBIDStr + "'"); + if (getAsUnsignedInteger(HashStr, 16, Hash)) + return createProfileParseError( + Twine("unsigned integer expected in hex format: '") + HashStr + + "'"); + FI->second.BBHashes[BBID] = Hash; + } + continue; + } default: return createProfileParseError(Twine("invalid specifier: '") + Twine(Specifier) + "'"); diff --git a/llvm/lib/Frontend/Driver/CodeGenOptions.cpp b/llvm/lib/Frontend/Driver/CodeGenOptions.cpp index df884908845d..b546e816419e 100644 --- a/llvm/lib/Frontend/Driver/CodeGenOptions.cpp +++ b/llvm/lib/Frontend/Driver/CodeGenOptions.cpp @@ -12,7 +12,6 @@ #include "llvm/TargetParser/Triple.h" namespace llvm { -extern llvm::cl::opt<bool> DebugInfoCorrelate; extern llvm::cl::opt<llvm::InstrProfCorrelator::ProfCorrelatorKind> ProfileCorrelate; } // namespace llvm @@ -64,8 +63,7 @@ TargetLibraryInfoImpl *createTLII(const llvm::Triple &TargetTriple, } std::string getDefaultProfileGenName() { - return llvm::DebugInfoCorrelate || - llvm::ProfileCorrelate != InstrProfCorrelator::NONE + return llvm::ProfileCorrelate != InstrProfCorrelator::NONE ? "default_%m.proflite" : "default_%m.profraw"; } diff --git a/llvm/lib/Object/ELF.cpp b/llvm/lib/Object/ELF.cpp index 6da97f9b3755..354c51d66419 100644 --- a/llvm/lib/Object/ELF.cpp +++ b/llvm/lib/Object/ELF.cpp @@ -831,17 +831,17 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF, }; uint8_t Version = 0; - uint8_t Feature = 0; + uint16_t Feature = 0; BBAddrMap::Features FeatEnable{}; while (!ULEBSizeErr && !MetadataDecodeErr && Cur && Cur.tell() < Content.size()) { Version = Data.getU8(Cur); if (!Cur) break; - if (Version < 2 || Version > 4) + if (Version < 2 || Version > 5) return createError("unsupported SHT_LLVM_BB_ADDR_MAP version: " + Twine(static_cast<int>(Version))); - Feature = Data.getU8(Cur); // Feature byte + Feature = Version < 5 ? Data.getU8(Cur) : Data.getU16(Cur); if (!Cur) break; auto FeatEnableOrErr = BBAddrMap::Features::decode(Feature); @@ -858,6 +858,11 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF, "basic block hash feature is enabled: version = " + Twine(static_cast<int>(Version)) + " feature = " + Twine(static_cast<int>(Feature))); + if (FeatEnable.PostLinkCfg && Version < 5) + return createError("version should be >= 5 for SHT_LLVM_BB_ADDR_MAP when " + "post link cfg feature is enabled: version = " + + Twine(static_cast<int>(Version)) + + " feature = " + Twine(static_cast<int>(Feature))); uint32_t NumBlocksInBBRange = 0; uint32_t NumBBRanges = 1; typename ELFFile<ELFT>::uintX_t RangeBaseAddress = 0; @@ -946,6 +951,10 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF, uint64_t BBF = FeatEnable.BBFreq ? readULEB128As<uint64_t>(Data, Cur, ULEBSizeErr) : 0; + uint32_t PostLinkBBFreq = + FeatEnable.PostLinkCfg + ? readULEB128As<uint32_t>(Data, Cur, ULEBSizeErr) + : 0; // Branch probability llvm::SmallVector<PGOAnalysisMap::PGOBBEntry::SuccessorEntry, 2> @@ -955,13 +964,20 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF, for (uint64_t I = 0; I < SuccCount; ++I) { uint32_t BBID = readULEB128As<uint32_t>(Data, Cur, ULEBSizeErr); uint32_t BrProb = readULEB128As<uint32_t>(Data, Cur, ULEBSizeErr); + uint32_t PostLinkFreq = + FeatEnable.PostLinkCfg + ? readULEB128As<uint32_t>(Data, Cur, ULEBSizeErr) + : 0; + if (PGOAnalyses) - Successors.push_back({BBID, BranchProbability::getRaw(BrProb)}); + Successors.push_back( + {BBID, BranchProbability::getRaw(BrProb), PostLinkFreq}); } } if (PGOAnalyses) - PGOBBEntries.push_back({BlockFrequency(BBF), std::move(Successors)}); + PGOBBEntries.push_back( + {BlockFrequency(BBF), PostLinkBBFreq, std::move(Successors)}); } if (PGOAnalyses) diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp index 8b75fbe8291f..8530785d07c9 100644 --- a/llvm/lib/ObjectYAML/ELFEmitter.cpp +++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp @@ -1465,13 +1465,19 @@ void ELFState<ELFT>::writeSectionContent( for (const auto &[Idx, E] : llvm::enumerate(*Section.Entries)) { // Write version and feature values. if (Section.Type == llvm::ELF::SHT_LLVM_BB_ADDR_MAP) { - if (E.Version > 4) + if (E.Version > 5) WithColor::warning() << "unsupported SHT_LLVM_BB_ADDR_MAP version: " << static_cast<int>(E.Version) << "; encoding using the most recent version"; CBA.write(E.Version); - CBA.write(E.Feature); - SHeader.sh_size += 2; + SHeader.sh_size += 1; + if (E.Version < 5) { + CBA.write(static_cast<uint8_t>(E.Feature)); + SHeader.sh_size += 1; + } else { + CBA.write<uint16_t>(E.Feature, ELFT::Endianness); + SHeader.sh_size += 2; + } } auto FeatureOrErr = llvm::object::BBAddrMap::Features::decode(E.Feature); bool MultiBBRangeFeatureEnabled = false; @@ -1556,11 +1562,15 @@ void ELFState<ELFT>::writeSectionContent( for (const auto &PGOBBE : PGOBBEntries) { if (PGOBBE.BBFreq) SHeader.sh_size += CBA.writeULEB128(*PGOBBE.BBFreq); + if (FeatureOrErr->PostLinkCfg || PGOBBE.PostLinkBBFreq.has_value()) + SHeader.sh_size += CBA.writeULEB128(PGOBBE.PostLinkBBFreq.value_or(0)); if (PGOBBE.Successors) { SHeader.sh_size += CBA.writeULEB128(PGOBBE.Successors->size()); - for (const auto &[ID, BrProb] : *PGOBBE.Successors) { + for (const auto &[ID, BrProb, PostLinkBrFreq] : *PGOBBE.Successors) { SHeader.sh_size += CBA.writeULEB128(ID); SHeader.sh_size += CBA.writeULEB128(BrProb); + if (FeatureOrErr->PostLinkCfg || PostLinkBrFreq.has_value()) + SHeader.sh_size += CBA.writeULEB128(PostLinkBrFreq.value_or(0)); } } } diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp index f8a84b075b77..e5e5fc20728e 100644 --- a/llvm/lib/ObjectYAML/ELFYAML.cpp +++ b/llvm/lib/ObjectYAML/ELFYAML.cpp @@ -1886,7 +1886,7 @@ void MappingTraits<ELFYAML::BBAddrMapEntry>::mapping( IO &IO, ELFYAML::BBAddrMapEntry &E) { assert(IO.getContext() && "The IO context is not initialized"); IO.mapRequired("Version", E.Version); - IO.mapOptional("Feature", E.Feature, Hex8(0)); + IO.mapOptional("Feature", E.Feature, Hex16(0)); IO.mapOptional("NumBBRanges", E.NumBBRanges); IO.mapOptional("BBRanges", E.BBRanges); } @@ -1920,6 +1920,7 @@ void MappingTraits<ELFYAML::PGOAnalysisMapEntry::PGOBBEntry>::mapping( IO &IO, ELFYAML::PGOAnalysisMapEntry::PGOBBEntry &E) { assert(IO.getContext() && "The IO context is not initialized"); IO.mapOptional("BBFreq", E.BBFreq); + IO.mapOptional("PostLinkBBFreq", E.PostLinkBBFreq); IO.mapOptional("Successors", E.Successors); } @@ -1929,6 +1930,7 @@ void MappingTraits<ELFYAML::PGOAnalysisMapEntry::PGOBBEntry::SuccessorEntry>:: assert(IO.getContext() && "The IO context is not initialized"); IO.mapRequired("ID", E.ID); IO.mapRequired("BrProb", E.BrProb); + IO.mapOptional("PostLinkBrFreq", E.PostLinkBrFreq); } void MappingTraits<ELFYAML::GnuHashHeader>::mapping(IO &IO, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 9ce12243016f..aed325cf627b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -221,12 +221,22 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const { bool AMDGPUInstructionSelector::selectCOPY_SCC_VCC(MachineInstr &I) const { const DebugLoc &DL = I.getDebugLoc(); MachineBasicBlock *BB = I.getParent(); + Register VCCReg = I.getOperand(1).getReg(); + MachineInstr *Cmp; + + if (STI.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { + unsigned CmpOpc = + STI.isWave64() ? AMDGPU::S_CMP_LG_U64 : AMDGPU::S_CMP_LG_U32; + Cmp = BuildMI(*BB, &I, DL, TII.get(CmpOpc)).addReg(VCCReg).addImm(0); + } else { + // For gfx7 and earlier, S_CMP_LG_U64 doesn't exist, so we use S_OR_B64 + // which sets SCC as a side effect. + Register DeadDst = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass); + Cmp = BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_OR_B64), DeadDst) + .addReg(VCCReg) + .addReg(VCCReg); + } - unsigned CmpOpc = - STI.isWave64() ? AMDGPU::S_CMP_LG_U64 : AMDGPU::S_CMP_LG_U32; - MachineInstr *Cmp = BuildMI(*BB, &I, DL, TII.get(CmpOpc)) - .addReg(I.getOperand(1).getReg()) - .addImm(0); if (!constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI)) return false; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp index 540756653dd2..b84c30ecaac0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp @@ -500,6 +500,16 @@ void RegBankLegalizeHelper::lowerUnpackMinMax(MachineInstr &MI) { MI.eraseFromParent(); } +void RegBankLegalizeHelper::lowerUnpackAExt(MachineInstr &MI) { + auto [Op1Lo, Op1Hi] = unpackAExt(MI.getOperand(1).getReg()); + auto [Op2Lo, Op2Hi] = unpackAExt(MI.getOperand(2).getReg()); + auto ResLo = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Op1Lo, Op2Lo}); + auto ResHi = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Op1Hi, Op2Hi}); + B.buildBuildVectorTrunc(MI.getOperand(0).getReg(), + {ResLo.getReg(0), ResHi.getReg(0)}); + MI.eraseFromParent(); +} + static bool isSignedBFE(MachineInstr &MI) { if (GIntrinsic *GI = dyn_cast<GIntrinsic>(&MI)) return (GI->is(Intrinsic::amdgcn_sbfe)); @@ -804,6 +814,8 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI, } break; } + case UnpackAExt: + return lowerUnpackAExt(MI); case WidenMMOToS32: return widenMMOToS32(cast<GAnyLoad>(MI)); } @@ -1120,7 +1132,8 @@ void RegBankLegalizeHelper::applyMappingDst( assert(RB == SgprRB); Register NewDst = MRI.createVirtualRegister(SgprRB_S32); Op.setReg(NewDst); - B.buildTrunc(Reg, NewDst); + if (!MRI.use_empty(Reg)) + B.buildTrunc(Reg, NewDst); break; } case InvalidMapping: { diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h index d937815bf471..ad3ff1d374ec 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h @@ -124,6 +124,7 @@ private: void lowerSplitTo32Select(MachineInstr &MI); void lowerSplitTo32SExtInReg(MachineInstr &MI); void lowerUnpackMinMax(MachineInstr &MI); + void lowerUnpackAExt(MachineInstr &MI); }; } // end namespace AMDGPU diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp index a67b12a22589..01abd358ff59 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp @@ -470,7 +470,19 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, .Uni(S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}}) .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}}) .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}) - .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}}); + .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}}) + .Uni(V2S16, {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackAExt}) + .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}}) + .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr64}}) + .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}}); + + addRulesForGOpcs({G_UADDO, G_USUBO}, Standard) + .Uni(S32, {{Sgpr32, Sgpr32Trunc}, {Sgpr32, Sgpr32}}) + .Div(S32, {{Vgpr32, Vcc}, {Vgpr32, Vgpr32}}); + + addRulesForGOpcs({G_UADDE, G_USUBE}, Standard) + .Uni(S32, {{Sgpr32, Sgpr32Trunc}, {Sgpr32, Sgpr32, Sgpr32AExtBoolInReg}}) + .Div(S32, {{Vgpr32, Vcc}, {Vgpr32, Vgpr32, Vcc}}); addRulesForGOpcs({G_MUL}, Standard).Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}}); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h index 93e0efda77fd..030bd75f8cd1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h @@ -223,7 +223,8 @@ enum LoweringMethodID { UniCstExt, SplitLoad, WidenLoad, - WidenMMOToS32 + WidenMMOToS32, + UnpackAExt }; enum FastRulesTypes { diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index b34ab2a7e08e..8bb28084159e 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -7035,9 +7035,15 @@ static SDValue lowerBALLOTIntrinsic(const SITargetLowering &TLI, SDNode *N, SDLoc SL(N); if (Src.getOpcode() == ISD::SETCC) { + SDValue Op0 = Src.getOperand(0); + SDValue Op1 = Src.getOperand(1); + // Need to expand bfloat to float for comparison (setcc). + if (Op0.getValueType() == MVT::bf16) { + Op0 = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Op0); + Op1 = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Op1); + } // (ballot (ISD::SETCC ...)) -> (AMDGPUISD::SETCC ...) - return DAG.getNode(AMDGPUISD::SETCC, SL, VT, Src.getOperand(0), - Src.getOperand(1), Src.getOperand(2)); + return DAG.getNode(AMDGPUISD::SETCC, SL, VT, Op0, Op1, Src.getOperand(2)); } if (const ConstantSDNode *Arg = dyn_cast<ConstantSDNode>(Src)) { // (ballot 0) -> 0 diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index a4d3d62e9f48..6b0653457cba 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -22109,6 +22109,11 @@ bool ARMTargetLowering::isComplexDeinterleavingOperationSupported( ScalarTy->isIntegerTy(32)); } +ArrayRef<MCPhysReg> ARMTargetLowering::getRoundingControlRegisters() const { + static const MCPhysReg RCRegs[] = {ARM::FPSCR_RM}; + return RCRegs; +} + Value *ARMTargetLowering::createComplexDeinterleavingIR( IRBuilderBase &B, ComplexDeinterleavingOperation OperationType, ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB, diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index 357d2c5d2fad..bf3438b0d880 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -1009,6 +1009,8 @@ class VectorType; bool isUnsupportedFloatingType(EVT VT) const; + ArrayRef<MCPhysReg> getRoundingControlRegisters() const override; + SDValue getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal, SDValue TrueVal, SDValue ARMcc, SDValue Flags, SelectionDAG &DAG) const; SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, diff --git a/llvm/lib/Target/Hexagon/HexagonCopyHoisting.cpp b/llvm/lib/Target/Hexagon/HexagonCopyHoisting.cpp index 3b810d0b65fa..79863e1c3cb7 100644 --- a/llvm/lib/Target/Hexagon/HexagonCopyHoisting.cpp +++ b/llvm/lib/Target/Hexagon/HexagonCopyHoisting.cpp @@ -34,7 +34,7 @@ class HexagonCopyHoisting : public MachineFunctionPass { public: static char ID; - HexagonCopyHoisting() : MachineFunctionPass(ID), MFN(nullptr), MRI(nullptr) {} + HexagonCopyHoisting() : MachineFunctionPass(ID) {} StringRef getPassName() const override { return "Hexagon Copy Hoisting"; } @@ -56,8 +56,8 @@ public: void moveCopyInstr(MachineBasicBlock *DestBB, std::pair<Register, Register> Key, MachineInstr *MI); - MachineFunction *MFN; - MachineRegisterInfo *MRI; + MachineFunction *MFN = nullptr; + MachineRegisterInfo *MRI = nullptr; std::vector<DenseMap<std::pair<Register, Register>, MachineInstr *>> CopyMIList; }; diff --git a/llvm/lib/Target/Hexagon/HexagonGenMemAbsolute.cpp b/llvm/lib/Target/Hexagon/HexagonGenMemAbsolute.cpp index 93418f7e15e8..a10c93704a85 100644 --- a/llvm/lib/Target/Hexagon/HexagonGenMemAbsolute.cpp +++ b/llvm/lib/Target/Hexagon/HexagonGenMemAbsolute.cpp @@ -34,13 +34,13 @@ STATISTIC(HexagonNumStoreAbsConversions, namespace { class HexagonGenMemAbsolute : public MachineFunctionPass { - const HexagonInstrInfo *TII; - MachineRegisterInfo *MRI; - const TargetRegisterInfo *TRI; + const HexagonInstrInfo *TII = nullptr; + MachineRegisterInfo *MRI = nullptr; + const TargetRegisterInfo *TRI = nullptr; public: static char ID; - HexagonGenMemAbsolute() : MachineFunctionPass(ID), TII(0), MRI(0), TRI(0) {} + HexagonGenMemAbsolute() : MachineFunctionPass(ID) {} StringRef getPassName() const override { return "Hexagon Generate Load/Store Set Absolute Address Instruction"; diff --git a/llvm/lib/Target/Hexagon/HexagonTfrCleanup.cpp b/llvm/lib/Target/Hexagon/HexagonTfrCleanup.cpp index 71bdfc6657c5..5a85f348fdaf 100644 --- a/llvm/lib/Target/Hexagon/HexagonTfrCleanup.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTfrCleanup.cpp @@ -43,7 +43,7 @@ namespace { class HexagonTfrCleanup : public MachineFunctionPass { public: static char ID; - HexagonTfrCleanup() : MachineFunctionPass(ID), HII(0), TRI(0) {} + HexagonTfrCleanup() : MachineFunctionPass(ID) {} StringRef getPassName() const override { return "Hexagon TFR Cleanup"; } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); @@ -52,8 +52,8 @@ public: bool runOnMachineFunction(MachineFunction &MF) override; private: - const HexagonInstrInfo *HII; - const TargetRegisterInfo *TRI; + const HexagonInstrInfo *HII = nullptr; + const TargetRegisterInfo *TRI = nullptr; typedef DenseMap<unsigned, uint64_t> ImmediateMap; diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td index 690dd73014e5..e86b21cf849c 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td @@ -365,6 +365,7 @@ def : Pat<(f32 (uint_to_fp (i64 (sexti32 (i64 GPR:$src))))), // FP Rounding let Predicates = [HasBasicF, IsLA64] in { def : PatFpr<frint, FRINT_S, FPR32>; +def : PatFpr<flog2, FLOGB_S, FPR32>; } // Predicates = [HasBasicF, IsLA64] let Predicates = [HasBasicF, IsLA32] in { diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td index daefbaa52d42..2e88254aab4d 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td @@ -348,6 +348,7 @@ def : Pat<(bitconvert FPR64:$src), (MOVFR2GR_D FPR64:$src)>; // FP Rounding let Predicates = [HasBasicD, IsLA64] in { def : PatFpr<frint, FRINT_D, FPR64>; +def : PatFpr<flog2, FLOGB_D, FPR64>; } // Predicates = [HasBasicD, IsLA64] /// Pseudo-instructions needed for the soft-float ABI with LA32D diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 80c96c6dc8eb..a6de839de7c2 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -244,8 +244,10 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FP_TO_BF16, MVT::f32, Subtarget.isSoftFPABI() ? LibCall : Custom); - if (Subtarget.is64Bit()) + if (Subtarget.is64Bit()) { setOperationAction(ISD::FRINT, MVT::f32, Legal); + setOperationAction(ISD::FLOG2, MVT::f32, Legal); + } if (!Subtarget.hasBasicD()) { setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); @@ -291,8 +293,10 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FP_TO_BF16, MVT::f64, Subtarget.isSoftFPABI() ? LibCall : Custom); - if (Subtarget.is64Bit()) + if (Subtarget.is64Bit()) { setOperationAction(ISD::FRINT, MVT::f64, Legal); + setOperationAction(ISD::FLOG2, MVT::f64, Legal); + } } // Set operations for 'LSX' feature. @@ -362,6 +366,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FMA, VT, Legal); setOperationAction(ISD::FSQRT, VT, Legal); setOperationAction(ISD::FNEG, VT, Legal); + setOperationAction(ISD::FLOG2, VT, Legal); setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, ISD::SETUGE, ISD::SETUGT}, VT, Expand); @@ -443,6 +448,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FMA, VT, Legal); setOperationAction(ISD::FSQRT, VT, Legal); setOperationAction(ISD::FNEG, VT, Legal); + setOperationAction(ISD::FLOG2, VT, Legal); setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, ISD::SETUGE, ISD::SETUGT}, VT, Expand); diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td index 613dea6093f5..ca4ee5f89573 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -1593,6 +1593,9 @@ def : Pat<(fma_nsz (fneg v4f64:$xj), v4f64:$xk, v4f64:$xa), // XVFSQRT_{S/D} defm : PatXrF<fsqrt, "XVFSQRT">; +// XVFLOGB_{S/D} +defm : PatXrF<flog2, "XVFLOGB">; + // XVRECIP_{S/D} def : Pat<(fdiv vsplatf32_fpimm_eq_1, v8f32:$xj), (XVFRECIP_S v8f32:$xj)>; @@ -2024,6 +2027,24 @@ def : Pat<(v4i32(fp_to_uint v4f64:$vj)), (XVFTINTRZ_LU_D v4f64:$vj)), sub_128)>; +// XVAVG_{B/H/W/D/BU/HU/WU/DU}, XVAVGR_{B/H/W/D/BU/HU/WU/DU} +defm : VAvgPat<sra, "XVAVG_B", v32i8>; +defm : VAvgPat<sra, "XVAVG_H", v16i16>; +defm : VAvgPat<sra, "XVAVG_W", v8i32>; +defm : VAvgPat<sra, "XVAVG_D", v4i64>; +defm : VAvgPat<srl, "XVAVG_BU", v32i8>; +defm : VAvgPat<srl, "XVAVG_HU", v16i16>; +defm : VAvgPat<srl, "XVAVG_WU", v8i32>; +defm : VAvgPat<srl, "XVAVG_DU", v4i64>; +defm : VAvgrPat<sra, "XVAVGR_B", v32i8>; +defm : VAvgrPat<sra, "XVAVGR_H", v16i16>; +defm : VAvgrPat<sra, "XVAVGR_W", v8i32>; +defm : VAvgrPat<sra, "XVAVGR_D", v4i64>; +defm : VAvgrPat<srl, "XVAVGR_BU", v32i8>; +defm : VAvgrPat<srl, "XVAVGR_HU", v16i16>; +defm : VAvgrPat<srl, "XVAVGR_WU", v8i32>; +defm : VAvgrPat<srl, "XVAVGR_DU", v4i64>; + // abs def : Pat<(abs v32i8:$xj), (XVSIGNCOV_B v32i8:$xj, v32i8:$xj)>; def : Pat<(abs v16i16:$xj), (XVSIGNCOV_H v16i16:$xj, v16i16:$xj)>; diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td index 4619c6bd248a..92402baa0fa0 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td @@ -1518,6 +1518,18 @@ multiclass InsertExtractPatV2<ValueType vecty, ValueType elemty> { } } +multiclass VAvgPat<SDPatternOperator OpNode, string Inst, ValueType vt> { + def : Pat<(OpNode (vt (add vt:$vj, vt:$vk)), (vt (vsplat_imm_eq_1))), + (!cast<LAInst>(Inst) vt:$vj, vt:$vk)>; +} + +multiclass VAvgrPat<SDPatternOperator OpNode, string Inst, ValueType vt> { + def : Pat<(OpNode (vt (add (vt (add vt:$vj, vt:$vk)), + (vt (vsplat_imm_eq_1)))), + (vt (vsplat_imm_eq_1))), + (!cast<LAInst>(Inst) vt:$vj, vt:$vk)>; +} + let Predicates = [HasExtLSX] in { // VADD_{B/H/W/D} @@ -1783,6 +1795,9 @@ def : Pat<(fma_nsz (fneg v2f64:$vj), v2f64:$vk, v2f64:$va), // VFSQRT_{S/D} defm : PatVrF<fsqrt, "VFSQRT">; +// VFLOGB_{S/D} +defm : PatVrF<flog2, "VFLOGB">; + // VFRECIP_{S/D} def : Pat<(fdiv vsplatf32_fpimm_eq_1, v4f32:$vj), (VFRECIP_S v4f32:$vj)>; @@ -2154,6 +2169,24 @@ def : Pat<(f32 f32imm_vldi:$in), def : Pat<(f64 f64imm_vldi:$in), (f64 (EXTRACT_SUBREG (VLDI (to_f64imm_vldi f64imm_vldi:$in)), sub_64))>; +// VAVG_{B/H/W/D/BU/HU/WU/DU}, VAVGR_{B/H/W/D/BU/HU/WU/DU} +defm : VAvgPat<sra, "VAVG_B", v16i8>; +defm : VAvgPat<sra, "VAVG_H", v8i16>; +defm : VAvgPat<sra, "VAVG_W", v4i32>; +defm : VAvgPat<sra, "VAVG_D", v2i64>; +defm : VAvgPat<srl, "VAVG_BU", v16i8>; +defm : VAvgPat<srl, "VAVG_HU", v8i16>; +defm : VAvgPat<srl, "VAVG_WU", v4i32>; +defm : VAvgPat<srl, "VAVG_DU", v2i64>; +defm : VAvgrPat<sra, "VAVGR_B", v16i8>; +defm : VAvgrPat<sra, "VAVGR_H", v8i16>; +defm : VAvgrPat<sra, "VAVGR_W", v4i32>; +defm : VAvgrPat<sra, "VAVGR_D", v2i64>; +defm : VAvgrPat<srl, "VAVGR_BU", v16i8>; +defm : VAvgrPat<srl, "VAVGR_HU", v8i16>; +defm : VAvgrPat<srl, "VAVGR_WU", v4i32>; +defm : VAvgrPat<srl, "VAVGR_DU", v2i64>; + // abs def : Pat<(abs v16i8:$vj), (VSIGNCOV_B v16i8:$vj, v16i8:$vj)>; def : Pat<(abs v8i16:$vj), (VSIGNCOV_H v8i16:$vj, v8i16:$vj)>; diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 9a6afa1cd4ea..b25a05400fe3 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -3995,6 +3995,7 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits, case RISCV::CTZW: case RISCV::CPOPW: case RISCV::SLLI_UW: + case RISCV::ABSW: case RISCV::FMV_W_X: case RISCV::FCVT_H_W: case RISCV::FCVT_H_W_INX: diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 1c930acd9c4a..56881f71934c 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -433,6 +433,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, if (Subtarget.hasStdExtP() || (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) { setOperationAction(ISD::ABS, XLenVT, Legal); + if (Subtarget.is64Bit()) + setOperationAction(ISD::ABS, MVT::i32, Custom); } else if (Subtarget.hasShortForwardBranchOpt()) { // We can use PseudoCCSUB to implement ABS. setOperationAction(ISD::ABS, XLenVT, Legal); @@ -14816,8 +14818,16 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && "Unexpected custom legalisation"); + if (Subtarget.hasStdExtP()) { + SDValue Src = + DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); + SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src); + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs)); + return; + } + if (Subtarget.hasStdExtZbb()) { - // Emit a special ABSW node that will be expanded to NEGW+MAX at isel. + // Emit a special node that will be expanded to NEGW+MAX at isel. // This allows us to remember that the result is sign extended. Expanding // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits. SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, @@ -20290,6 +20300,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, break; } + case RISCVISD::ABSW: case RISCVISD::CLZW: case RISCVISD::CTZW: { // Only the lower 32 bits of the first operand are read @@ -21862,6 +21873,7 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( case RISCVISD::REMUW: case RISCVISD::ROLW: case RISCVISD::RORW: + case RISCVISD::ABSW: case RISCVISD::FCVT_W_RV64: case RISCVISD::FCVT_WU_RV64: case RISCVISD::STRICT_FCVT_W_RV64: diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td index cc085bb6c9fd..4cbbba3aa68c 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td @@ -1461,5 +1461,10 @@ let Predicates = [HasStdExtP, IsRV32] in { // Codegen patterns //===----------------------------------------------------------------------===// +def riscv_absw : RVSDNode<"ABSW", SDTIntUnaryOp>; + let Predicates = [HasStdExtP] in def : PatGpr<abs, ABS>; + +let Predicates = [HasStdExtP, IsRV64] in +def : PatGpr<riscv_absw, ABSW>; diff --git a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp index d08115b72977..ea98cdb4a1e6 100644 --- a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp +++ b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp @@ -172,6 +172,7 @@ static bool hasAllNBitUsers(const MachineInstr &OrigMI, case RISCV::CTZW: case RISCV::CPOPW: case RISCV::SLLI_UW: + case RISCV::ABSW: case RISCV::FMV_W_X: case RISCV::FCVT_H_W: case RISCV::FCVT_H_W_INX: diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index 7795cce9d9d3..b5548d4f24a2 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -69,14 +69,6 @@ namespace llvm { // Command line option to enable vtable value profiling. Defined in // ProfileData/InstrProf.cpp: -enable-vtable-value-profiling= extern cl::opt<bool> EnableVTableValueProfiling; -// TODO: Remove -debug-info-correlate in next LLVM release, in favor of -// -profile-correlate=debug-info. -cl::opt<bool> DebugInfoCorrelate( - "debug-info-correlate", - cl::desc("Use debug info to correlate profiles. (Deprecated, use " - "-profile-correlate=debug-info)"), - cl::init(false)); - LLVM_ABI cl::opt<InstrProfCorrelator::ProfCorrelatorKind> ProfileCorrelate( "profile-correlate", cl::desc("Use debug info or binary file to correlate profiles."), @@ -1047,7 +1039,7 @@ void InstrLowerer::lowerValueProfileInst(InstrProfValueProfileInst *Ind) { // in lightweight mode. We need to move the value profile pointer to the // Counter struct to get this working. assert( - !DebugInfoCorrelate && ProfileCorrelate == InstrProfCorrelator::NONE && + ProfileCorrelate == InstrProfCorrelator::NONE && "Value profiling is not yet supported with lightweight instrumentation"); GlobalVariable *Name = Ind->getName(); auto It = ProfileDataMap.find(Name); @@ -1504,7 +1496,7 @@ static inline Constant *getVTableAddrForProfData(GlobalVariable *GV) { } void InstrLowerer::getOrCreateVTableProfData(GlobalVariable *GV) { - assert(!DebugInfoCorrelate && + assert(ProfileCorrelate != InstrProfCorrelator::DEBUG_INFO && "Value profiling is not supported with lightweight instrumentation"); if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage()) return; @@ -1584,8 +1576,7 @@ GlobalVariable *InstrLowerer::setupProfileSection(InstrProfInstBase *Inc, // Use internal rather than private linkage so the counter variable shows up // in the symbol table when using debug info for correlation. - if ((DebugInfoCorrelate || - ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) && + if (ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO && TT.isOSBinFormatMachO() && Linkage == GlobalValue::PrivateLinkage) Linkage = GlobalValue::InternalLinkage; @@ -1691,8 +1682,7 @@ InstrLowerer::getOrCreateRegionCounters(InstrProfCntrInstBase *Inc) { auto *CounterPtr = setupProfileSection(Inc, IPSK_cnts); PD.RegionCounters = CounterPtr; - if (DebugInfoCorrelate || - ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) { + if (ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) { LLVMContext &Ctx = M.getContext(); Function *Fn = Inc->getParent()->getParent(); if (auto *SP = Fn->getSubprogram()) { @@ -1737,7 +1727,7 @@ InstrLowerer::getOrCreateRegionCounters(InstrProfCntrInstBase *Inc) { void InstrLowerer::createDataVariable(InstrProfCntrInstBase *Inc) { // When debug information is correlated to profile data, a data variable // is not needed. - if (DebugInfoCorrelate || ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) + if (ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) return; GlobalVariable *NamePtr = Inc->getName(); diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index 71736cfa4d89..af53fa0bae46 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -456,7 +456,7 @@ createIRLevelProfileFlagVar(Module &M, ProfileVersion |= VARIANT_MASK_INSTR_ENTRY; if (PGOInstrumentLoopEntries) ProfileVersion |= VARIANT_MASK_INSTR_LOOP_ENTRIES; - if (DebugInfoCorrelate || ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) + if (ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) ProfileVersion |= VARIANT_MASK_DBG_CORRELATE; if (PGOFunctionEntryCoverage) ProfileVersion |= diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index 5af6c96c56a0..bb6c879f4d47 100644 --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -81,6 +81,7 @@ STATISTIC( STATISTIC(NumInvariantConditionsInjected, "Number of invariant conditions injected and unswitched"); +namespace llvm { static cl::opt<bool> EnableNonTrivialUnswitch( "enable-nontrivial-unswitch", cl::init(false), cl::Hidden, cl::desc("Forcibly enables non-trivial loop unswitching rather than " @@ -131,11 +132,17 @@ static cl::opt<bool> InjectInvariantConditions( static cl::opt<unsigned> InjectInvariantConditionHotnesThreshold( "simple-loop-unswitch-inject-invariant-condition-hotness-threshold", - cl::Hidden, cl::desc("Only try to inject loop invariant conditions and " - "unswitch on them to eliminate branches that are " - "not-taken 1/<this option> times or less."), + cl::Hidden, + cl::desc("Only try to inject loop invariant conditions and " + "unswitch on them to eliminate branches that are " + "not-taken 1/<this option> times or less."), cl::init(16)); +static cl::opt<bool> EstimateProfile("simple-loop-unswitch-estimate-profile", + cl::Hidden, cl::init(true)); +extern cl::opt<bool> ProfcheckDisableMetadataFixes; +} // namespace llvm + AnalysisKey ShouldRunExtraSimpleLoopUnswitch::Key; namespace { struct CompareDesc { @@ -268,13 +275,42 @@ static bool areLoopExitPHIsLoopInvariant(const Loop &L, llvm_unreachable("Basic blocks should never be empty!"); } -/// Copy a set of loop invariant values \p ToDuplicate and insert them at the +/// Copy a set of loop invariant values \p Invariants and insert them at the /// end of \p BB and conditionally branch on the copied condition. We only /// branch on a single value. +/// We attempt to estimate the profile of the resulting conditional branch from +/// \p ComputeProfFrom, which is the original conditional branch we're +/// unswitching. +/// When \p Direction is true, the \p Invariants form a disjunction, and the +/// branch conditioned on it exits the loop on the "true" case. When \p +/// Direction is false, the \p Invariants form a conjunction and the branch +/// exits on the "false" case. static void buildPartialUnswitchConditionalBranch( BasicBlock &BB, ArrayRef<Value *> Invariants, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze, - const Instruction *I, AssumptionCache *AC, const DominatorTree &DT) { + const Instruction *I, AssumptionCache *AC, const DominatorTree &DT, + const BranchInst &ComputeProfFrom) { + + SmallVector<uint32_t> BranchWeights; + bool HasBranchWeights = EstimateProfile && !ProfcheckDisableMetadataFixes && + extractBranchWeights(ComputeProfFrom, BranchWeights); + // If Direction is true, that means we had a disjunction and that the "true" + // case exits. The probability of the disjunction of the subset of terms is at + // most as high as the original one. So, if the probability is higher than the + // one we'd assign in absence of a profile (i.e. 0.5), we will use 0.5, + // but if it's lower, we will use the original probability. + // Conversely, if Direction is false, that means we had a conjunction, and the + // probability of exiting is captured in the second branch weight. That + // probability is a disjunction (of the negation of the original terms). The + // same reasoning applies as above. + // Issue #165649: should we expect BFI to conserve, and use that to calculate + // the branch weights? + if (HasBranchWeights && + static_cast<double>(BranchWeights[Direction ? 0 : 1]) / + static_cast<double>(sum_of(BranchWeights)) > + 0.5) + HasBranchWeights = false; + IRBuilder<> IRB(&BB); IRB.SetCurrentDebugLocation(DebugLoc::getCompilerGenerated()); @@ -287,8 +323,14 @@ static void buildPartialUnswitchConditionalBranch( Value *Cond = Direction ? IRB.CreateOr(FrozenInvariants) : IRB.CreateAnd(FrozenInvariants); - IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc, - Direction ? &NormalSucc : &UnswitchedSucc); + auto *BR = IRB.CreateCondBr( + Cond, Direction ? &UnswitchedSucc : &NormalSucc, + Direction ? &NormalSucc : &UnswitchedSucc, + HasBranchWeights ? ComputeProfFrom.getMetadata(LLVMContext::MD_prof) + : nullptr); + if (!HasBranchWeights) + setExplicitlyUnknownBranchWeightsIfProfiled( + *BR, *BR->getParent()->getParent(), DEBUG_TYPE); } /// Copy a set of loop invariant values, and conditionally branch on them. @@ -658,7 +700,7 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, " condition!"); buildPartialUnswitchConditionalBranch( *OldPH, Invariants, ExitDirection, *UnswitchedBB, *NewPH, - FreezeLoopUnswitchCond, OldPH->getTerminator(), nullptr, DT); + FreezeLoopUnswitchCond, OldPH->getTerminator(), nullptr, DT, BI); } // Update the dominator tree with the added edge. @@ -2477,7 +2519,7 @@ static void unswitchNontrivialInvariants( else { buildPartialUnswitchConditionalBranch( *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, - FreezeLoopUnswitchCond, BI, &AC, DT); + FreezeLoopUnswitchCond, BI, &AC, DT, *BI); } DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH}); diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index b03fb6213d61..7f6d779687e9 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -5977,14 +5977,14 @@ bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI, } // Prune obsolete incoming values off the successors' PHI nodes. - for (auto BBI = Dest->begin(); isa<PHINode>(BBI); ++BBI) { + for (auto &PHI : make_early_inc_range(Dest->phis())) { unsigned PreviousEdges = Cases->size(); if (Dest == SI->getDefaultDest()) ++PreviousEdges; for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I) - cast<PHINode>(BBI)->removeIncomingValue(SI->getParent()); + PHI.removeIncomingValue(SI->getParent()); } - for (auto BBI = OtherDest->begin(); isa<PHINode>(BBI); ++BBI) { + for (auto &PHI : make_early_inc_range(OtherDest->phis())) { unsigned PreviousEdges = OtherCases->size(); if (OtherDest == SI->getDefaultDest()) ++PreviousEdges; @@ -5993,7 +5993,7 @@ bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI, if (NewBI->isUnconditional()) ++E; for (unsigned I = 0; I != E; ++I) - cast<PHINode>(BBI)->removeIncomingValue(SI->getParent()); + PHI.removeIncomingValue(SI->getParent()); } // Clean up the default block - it may have phis or other instructions before diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 8ebc10808027..25bf49db0e07 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3908,7 +3908,7 @@ void LoopVectorizationPlanner::emitInvalidCostRemarks( continue; VPCostContext CostCtx(CM.TTI, *CM.TLI, *Plan, CM, CM.CostKind, - *CM.PSE.getSE()); + *CM.PSE.getSE(), OrigLoop); precomputeCosts(*Plan, VF, CostCtx); auto Iter = vp_depth_first_deep(Plan->getVectorLoopRegion()->getEntry()); for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) { @@ -4166,7 +4166,7 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() { // Add on other costs that are modelled in VPlan, but not in the legacy // cost model. VPCostContext CostCtx(CM.TTI, *CM.TLI, *P, CM, CM.CostKind, - *CM.PSE.getSE()); + *CM.PSE.getSE(), OrigLoop); VPRegionBlock *VectorRegion = P->getVectorLoopRegion(); assert(VectorRegion && "Expected to have a vector region!"); for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>( @@ -6876,7 +6876,8 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF, InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan, ElementCount VF) const { - VPCostContext CostCtx(CM.TTI, *CM.TLI, Plan, CM, CM.CostKind, *PSE.getSE()); + VPCostContext CostCtx(CM.TTI, *CM.TLI, Plan, CM, CM.CostKind, *PSE.getSE(), + OrigLoop); InstructionCost Cost = precomputeCosts(Plan, VF, CostCtx); // Now compute and add the VPlan-based cost. @@ -7110,12 +7111,13 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() { // case, don't trigger the assertion, as the extra simplifications may cause a // different VF to be picked by the VPlan-based cost model. VPCostContext CostCtx(CM.TTI, *CM.TLI, BestPlan, CM, CM.CostKind, - *CM.PSE.getSE()); + *CM.PSE.getSE(), OrigLoop); precomputeCosts(BestPlan, BestFactor.Width, CostCtx); // Verify that the VPlan-based and legacy cost models agree, except for VPlans // with early exits and plans with additional VPlan simplifications. The // legacy cost model doesn't properly model costs for such loops. assert((BestFactor.Width == LegacyVF.Width || BestPlan.hasEarlyExit() || + !Legal->getLAI()->getSymbolicStrides().empty() || planContainsAdditionalSimplifications(getPlanFor(BestFactor.Width), CostCtx, OrigLoop, BestFactor.Width) || @@ -8340,11 +8342,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( &R) || (isa<VPInstruction>(&R) && !UnderlyingValue)) continue; - - // FIXME: VPlan0, which models a copy of the original scalar loop, should - // not use VPWidenPHIRecipe to model the phis. - assert((isa<VPWidenPHIRecipe>(&R) || isa<VPInstruction>(&R)) && - UnderlyingValue && "unsupported recipe"); + assert(isa<VPInstruction>(&R) && UnderlyingValue && "unsupported recipe"); // TODO: Gradually replace uses of underlying instruction by analyses on // VPlan. @@ -8445,7 +8443,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( // and mulacc-reduction are implemented. if (!CM.foldTailWithEVL()) { VPCostContext CostCtx(CM.TTI, *CM.TLI, *Plan, CM, CM.CostKind, - *CM.PSE.getSE()); + *CM.PSE.getSE(), OrigLoop); VPlanTransforms::runPass(VPlanTransforms::convertToAbstractRecipes, *Plan, CostCtx, Range); } @@ -9915,7 +9913,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { bool ForceVectorization = Hints.getForce() == LoopVectorizeHints::FK_Enabled; VPCostContext CostCtx(CM.TTI, *CM.TLI, LVP.getPlanFor(VF.Width), CM, - CM.CostKind, *CM.PSE.getSE()); + CM.CostKind, *CM.PSE.getSE(), L); if (!ForceVectorization && !isOutsideLoopWorkProfitable(Checks, VF, L, PSE, CostCtx, LVP.getPlanFor(VF.Width), SEL, diff --git a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h index 2aaabd9ebdd0..965426f86ff2 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h +++ b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h @@ -350,13 +350,14 @@ struct VPCostContext { SmallPtrSet<Instruction *, 8> SkipCostComputation; TargetTransformInfo::TargetCostKind CostKind; ScalarEvolution &SE; + const Loop *L; VPCostContext(const TargetTransformInfo &TTI, const TargetLibraryInfo &TLI, const VPlan &Plan, LoopVectorizationCostModel &CM, TargetTransformInfo::TargetCostKind CostKind, - ScalarEvolution &SE) + ScalarEvolution &SE, const Loop *L) : TTI(TTI), TLI(TLI), Types(Plan), LLVMCtx(Plan.getContext()), CM(CM), - CostKind(CostKind), SE(SE) {} + CostKind(CostKind), SE(SE), L(L) {} /// Return the cost for \p UI with \p VF using the legacy cost model as /// fallback until computing the cost of all recipes migrates to VPlan. diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 9a63c802047e..bde62dd6dd4b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -3167,26 +3167,30 @@ bool VPReplicateRecipe::shouldPack() const { }); } -/// Returns true if \p Ptr is a pointer computation for which the legacy cost -/// model computes a SCEV expression when computing the address cost. -static bool shouldUseAddressAccessSCEV(const VPValue *Ptr) { +/// Returns a SCEV expression for \p Ptr if it is a pointer computation for +/// which the legacy cost model computes a SCEV expression when computing the +/// address cost. Computing SCEVs for VPValues is incomplete and returns +/// SCEVCouldNotCompute in cases the legacy cost model can compute SCEVs. In +/// those cases we fall back to the legacy cost model. Otherwise return nullptr. +static const SCEV *getAddressAccessSCEV(const VPValue *Ptr, ScalarEvolution &SE, + const Loop *L) { auto *PtrR = Ptr->getDefiningRecipe(); if (!PtrR || !((isa<VPReplicateRecipe>(PtrR) && cast<VPReplicateRecipe>(PtrR)->getOpcode() == Instruction::GetElementPtr) || isa<VPWidenGEPRecipe>(PtrR) || match(Ptr, m_GetElementPtr(m_VPValue(), m_VPValue())))) - return false; + return nullptr; // We are looking for a GEP where all indices are either loop invariant or // inductions. for (VPValue *Opd : drop_begin(PtrR->operands())) { if (!Opd->isDefinedOutsideLoopRegions() && !isa<VPScalarIVStepsRecipe, VPWidenIntOrFpInductionRecipe>(Opd)) - return false; + return nullptr; } - return true; + return vputils::getSCEVExprForVPValue(Ptr, SE, L); } /// Returns true if \p V is used as part of the address of another load or @@ -3354,9 +3358,8 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF, bool IsLoad = UI->getOpcode() == Instruction::Load; const VPValue *PtrOp = getOperand(!IsLoad); - // TODO: Handle cases where we need to pass a SCEV to - // getAddressComputationCost. - if (shouldUseAddressAccessSCEV(PtrOp)) + const SCEV *PtrSCEV = getAddressAccessSCEV(PtrOp, Ctx.SE, Ctx.L); + if (isa_and_nonnull<SCEVCouldNotCompute>(PtrSCEV)) break; Type *ValTy = Ctx.Types.inferScalarType(IsLoad ? this : getOperand(0)); @@ -3374,7 +3377,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF, InstructionCost ScalarCost = ScalarMemOpCost + Ctx.TTI.getAddressComputationCost( PtrTy, UsedByLoadStoreAddress ? nullptr : &Ctx.SE, - nullptr, Ctx.CostKind); + PtrSCEV, Ctx.CostKind); if (isSingleScalar()) return ScalarCost; diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp index 4db92e7def3e..8c23e78693db 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp @@ -75,7 +75,8 @@ bool vputils::isHeaderMask(const VPValue *V, const VPlan &Plan) { B == Plan.getBackedgeTakenCount(); } -const SCEV *vputils::getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE) { +const SCEV *vputils::getSCEVExprForVPValue(const VPValue *V, + ScalarEvolution &SE, const Loop *L) { if (V->isLiveIn()) { if (Value *LiveIn = V->getLiveInIRValue()) return SE.getSCEV(LiveIn); @@ -86,6 +87,53 @@ const SCEV *vputils::getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE) { return TypeSwitch<const VPRecipeBase *, const SCEV *>(V->getDefiningRecipe()) .Case<VPExpandSCEVRecipe>( [](const VPExpandSCEVRecipe *R) { return R->getSCEV(); }) + .Case<VPCanonicalIVPHIRecipe>([&SE, L](const VPCanonicalIVPHIRecipe *R) { + if (!L) + return SE.getCouldNotCompute(); + const SCEV *Start = getSCEVExprForVPValue(R->getOperand(0), SE, L); + return SE.getAddRecExpr(Start, SE.getOne(Start->getType()), L, + SCEV::FlagAnyWrap); + }) + .Case<VPDerivedIVRecipe>([&SE, L](const VPDerivedIVRecipe *R) { + const SCEV *Start = getSCEVExprForVPValue(R->getOperand(0), SE, L); + const SCEV *IV = getSCEVExprForVPValue(R->getOperand(1), SE, L); + const SCEV *Scale = getSCEVExprForVPValue(R->getOperand(2), SE, L); + if (any_of(ArrayRef({Start, IV, Scale}), IsaPred<SCEVCouldNotCompute>)) + return SE.getCouldNotCompute(); + + return SE.getAddExpr(SE.getTruncateOrSignExtend(Start, IV->getType()), + SE.getMulExpr(IV, SE.getTruncateOrSignExtend( + Scale, IV->getType()))); + }) + .Case<VPScalarIVStepsRecipe>([&SE, L](const VPScalarIVStepsRecipe *R) { + const SCEV *IV = getSCEVExprForVPValue(R->getOperand(0), SE, L); + const SCEV *Step = getSCEVExprForVPValue(R->getOperand(1), SE, L); + if (isa<SCEVCouldNotCompute>(IV) || isa<SCEVCouldNotCompute>(Step) || + !Step->isOne()) + return SE.getCouldNotCompute(); + return SE.getMulExpr(SE.getTruncateOrSignExtend(IV, Step->getType()), + Step); + }) + .Case<VPReplicateRecipe>([&SE, L](const VPReplicateRecipe *R) { + if (R->getOpcode() != Instruction::GetElementPtr) + return SE.getCouldNotCompute(); + + const SCEV *Base = getSCEVExprForVPValue(R->getOperand(0), SE, L); + if (isa<SCEVCouldNotCompute>(Base)) + return SE.getCouldNotCompute(); + + SmallVector<const SCEV *> IndexExprs; + for (VPValue *Index : drop_begin(R->operands())) { + const SCEV *IndexExpr = getSCEVExprForVPValue(Index, SE, L); + if (isa<SCEVCouldNotCompute>(IndexExpr)) + return SE.getCouldNotCompute(); + IndexExprs.push_back(IndexExpr); + } + + Type *SrcElementTy = cast<GetElementPtrInst>(R->getUnderlyingInstr()) + ->getSourceElementType(); + return SE.getGEPExpr(Base, IndexExprs, SrcElementTy); + }) .Default([&SE](const VPRecipeBase *) { return SE.getCouldNotCompute(); }); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h index 37cd413da907..c21a0e70c139 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h @@ -37,7 +37,8 @@ VPValue *getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr); /// Return the SCEV expression for \p V. Returns SCEVCouldNotCompute if no /// SCEV expression could be constructed. -const SCEV *getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE); +const SCEV *getSCEVExprForVPValue(const VPValue *V, ScalarEvolution &SE, + const Loop *L = nullptr); /// Returns true if \p VPV is a single scalar, either because it produces the /// same value for all lanes or only has its first lane used. diff --git a/llvm/test/Analysis/DependenceAnalysis/GCD.ll b/llvm/test/Analysis/DependenceAnalysis/GCD.ll index 03343e7a9821..cb14d189afe4 100644 --- a/llvm/test/Analysis/DependenceAnalysis/GCD.ll +++ b/llvm/test/Analysis/DependenceAnalysis/GCD.ll @@ -254,7 +254,7 @@ define void @gcd4(ptr %A, ptr %B, i64 %M, i64 %N) nounwind uwtable ssp { ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %conv, ptr %arrayidx, align 4 ; CHECK-NEXT: da analyze - output [* *]! ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx16, align 4 -; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: da analyze - flow [* *|<]! ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %0, ptr %B.addr.11, align 4 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx16, align 4 --> Dst: %0 = load i32, ptr %arrayidx16, align 4 @@ -322,7 +322,7 @@ define void @gcd5(ptr %A, ptr %B, i64 %M, i64 %N) nounwind uwtable ssp { ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %conv, ptr %arrayidx, align 4 ; CHECK-NEXT: da analyze - output [* *]! ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx16, align 4 -; CHECK-NEXT: da analyze - flow [<> *]! +; CHECK-NEXT: da analyze - flow [* *|<]! ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %0, ptr %B.addr.11, align 4 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx16, align 4 --> Dst: %0 = load i32, ptr %arrayidx16, align 4 @@ -390,7 +390,7 @@ define void @gcd6(i64 %n, ptr %A, ptr %B) nounwind uwtable ssp { ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx5, align 4 --> Dst: store i32 %conv, ptr %arrayidx5, align 4 ; CHECK-NEXT: da analyze - output [* *]! ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx5, align 4 --> Dst: %2 = load i32, ptr %arrayidx9, align 4 -; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: da analyze - flow [* *|<]! ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx5, align 4 --> Dst: store i32 %2, ptr %B.addr.12, align 4 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: %2 = load i32, ptr %arrayidx9, align 4 --> Dst: %2 = load i32, ptr %arrayidx9, align 4 diff --git a/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll b/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll index cdfaec76fa89..73a415baef4c 100644 --- a/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll +++ b/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll @@ -384,7 +384,7 @@ define void @symbolicsiv6(ptr %A, ptr %B, i64 %n, i64 %N, i64 %M) nounwind uwtab ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %conv, ptr %arrayidx, align 4 ; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx7, align 4 -; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: da analyze - flow [*|<]! ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx7, align 4 --> Dst: %0 = load i32, ptr %arrayidx7, align 4 @@ -440,7 +440,7 @@ define void @symbolicsiv7(ptr %A, ptr %B, i64 %n, i64 %N, i64 %M) nounwind uwtab ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %conv, ptr %arrayidx, align 4 ; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %1 = load i32, ptr %arrayidx6, align 4 -; CHECK-NEXT: da analyze - flow [<>]! +; CHECK-NEXT: da analyze - flow [*|<]! ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %1, ptr %B.addr.02, align 4 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: %1 = load i32, ptr %arrayidx6, align 4 --> Dst: %1 = load i32, ptr %arrayidx6, align 4 diff --git a/llvm/test/Analysis/DependenceAnalysis/compute-absolute-value.ll b/llvm/test/Analysis/DependenceAnalysis/compute-absolute-value.ll index 64fad37ab699..783150af2cd1 100644 --- a/llvm/test/Analysis/DependenceAnalysis/compute-absolute-value.ll +++ b/llvm/test/Analysis/DependenceAnalysis/compute-absolute-value.ll @@ -18,7 +18,7 @@ define void @unknown_sign(ptr %a, i64 %k) { ; CHECK-NEXT: Src: store i8 1, ptr %idx.0, align 1 --> Dst: store i8 1, ptr %idx.0, align 1 ; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: store i8 1, ptr %idx.0, align 1 --> Dst: store i8 2, ptr %idx.1, align 1 -; CHECK-NEXT: da analyze - output [<>]! +; CHECK-NEXT: da analyze - output [*|<]! ; CHECK-NEXT: Src: store i8 2, ptr %idx.1, align 1 --> Dst: store i8 2, ptr %idx.1, align 1 ; CHECK-NEXT: da analyze - none! ; diff --git a/llvm/test/Analysis/DependenceAnalysis/gcd-miv-overflow.ll b/llvm/test/Analysis/DependenceAnalysis/gcd-miv-overflow.ll index 43f66dd7d097..9169ac323d83 100644 --- a/llvm/test/Analysis/DependenceAnalysis/gcd-miv-overflow.ll +++ b/llvm/test/Analysis/DependenceAnalysis/gcd-miv-overflow.ll @@ -13,23 +13,20 @@ ; offset1 += 3; ; } ; -; FIXME: DependenceAnalysis currently detects no dependency between the two -; stores, but it does exist. E.g., consider `m` is 12297829382473034411, which -; is a modular multiplicative inverse of 3 under modulo 2^64. Then `offset0` is -; effectively `i + 4`, so accesses will be as follows: +; Dependency exists between the two stores. E.g., consider `m` is +; 12297829382473034411, which is a modular multiplicative inverse of 3 under +; modulo 2^64. Then `offset0` is effectively `i + 4`, so accesses will be as +; follows: ; ; - A[offset0] : A[4], A[5], A[6], ... ; - A[offset1] : A[0], A[3], A[6], ... ; -; The root cause is that DA interprets `3*m` in non-modular arithmetic, which -; isn't necessarily true due to overflow. -; define void @gcdmiv_coef_ovfl(ptr %A, i64 %m) { ; CHECK-ALL-LABEL: 'gcdmiv_coef_ovfl' ; CHECK-ALL-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.0, align 1 ; CHECK-ALL-NEXT: da analyze - none! ; CHECK-ALL-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 2, ptr %gep.1, align 1 -; CHECK-ALL-NEXT: da analyze - none! +; CHECK-ALL-NEXT: da analyze - output [*|<]! ; CHECK-ALL-NEXT: Src: store i8 2, ptr %gep.1, align 1 --> Dst: store i8 2, ptr %gep.1, align 1 ; CHECK-ALL-NEXT: da analyze - none! ; @@ -37,7 +34,7 @@ define void @gcdmiv_coef_ovfl(ptr %A, i64 %m) { ; CHECK-GCD-MIV-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.0, align 1 ; CHECK-GCD-MIV-NEXT: da analyze - consistent output [*]! ; CHECK-GCD-MIV-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 2, ptr %gep.1, align 1 -; CHECK-GCD-MIV-NEXT: da analyze - none! +; CHECK-GCD-MIV-NEXT: da analyze - consistent output [*|<]! ; CHECK-GCD-MIV-NEXT: Src: store i8 2, ptr %gep.1, align 1 --> Dst: store i8 2, ptr %gep.1, align 1 ; CHECK-GCD-MIV-NEXT: da analyze - consistent output [*]! ; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/add.ll new file mode 100644 index 000000000000..e11720011af1 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/add.ll @@ -0,0 +1,612 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX11 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX12 %s + +define i16 @s_add_i16(i16 inreg %a, i16 inreg %b) { +; GFX7-LABEL: s_add_i16: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_add_i32 s16, s16, s17 +; GFX7-NEXT: v_mov_b32_e32 v0, s16 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: s_add_i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_add_i32 s16, s16, s17 +; GFX9-NEXT: v_mov_b32_e32 v0, s16 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: s_add_i16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_add_i32 s16, s16, s17 +; GFX8-NEXT: v_mov_b32_e32 v0, s16 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: s_add_i16: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_add_i32 s16, s16, s17 +; GFX10-NEXT: v_mov_b32_e32 v0, s16 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: s_add_i16: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_add_i32 s0, s0, s1 +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: s_add_i16: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_add_co_i32 s0, s0, s1 +; GFX12-NEXT: s_wait_alu 0xfffe +; GFX12-NEXT: v_mov_b32_e32 v0, s0 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %c = add i16 %a, %b + ret i16 %c +} + +define i16 @v_add_i16(i16 %a, i16 %b) { +; GFX7-LABEL: v_add_i16: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_add_i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_u16_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_add_i16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_add_u16_e32 v0, v0, v1 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_add_i16: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_add_nc_u16 v0, v0, v1 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_add_i16: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_add_nc_u16 v0.l, v0.l, v1.l +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_add_i16: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_add_nc_u16 v0, v0, v1 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %c = add i16 %a, %b + ret i16 %c +} + +define i32 @s_add_i32(i32 inreg %a, i32 inreg %b) { +; GFX7-LABEL: s_add_i32: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_add_i32 s16, s16, s17 +; GFX7-NEXT: v_mov_b32_e32 v0, s16 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: s_add_i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_add_i32 s16, s16, s17 +; GFX9-NEXT: v_mov_b32_e32 v0, s16 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: s_add_i32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_add_i32 s16, s16, s17 +; GFX8-NEXT: v_mov_b32_e32 v0, s16 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: s_add_i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_add_i32 s16, s16, s17 +; GFX10-NEXT: v_mov_b32_e32 v0, s16 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: s_add_i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_add_i32 s0, s0, s1 +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: s_add_i32: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_add_co_i32 s0, s0, s1 +; GFX12-NEXT: s_wait_alu 0xfffe +; GFX12-NEXT: v_mov_b32_e32 v0, s0 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %c = add i32 %a, %b + ret i32 %c +} + +define i32 @v_add_i32(i32 %a, i32 %b) { +; GFX7-LABEL: v_add_i32: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_add_i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_add_i32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_add_i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_add_i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_add_nc_u32_e32 v0, v0, v1 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_add_i32: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v1 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %c = add i32 %a, %b + ret i32 %c +} + +define <2 x i16> @s_add_v2i16(<2 x i16> inreg %a, <2 x i16> inreg %b) { +; GFX7-LABEL: s_add_v2i16: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_add_i32 s16, s16, s18 +; GFX7-NEXT: s_add_i32 s17, s17, s19 +; GFX7-NEXT: v_mov_b32_e32 v0, s16 +; GFX7-NEXT: v_mov_b32_e32 v1, s17 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: s_add_v2i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_lshr_b32 s4, s16, 16 +; GFX9-NEXT: s_lshr_b32 s5, s17, 16 +; GFX9-NEXT: s_add_i32 s16, s16, s17 +; GFX9-NEXT: s_add_i32 s4, s4, s5 +; GFX9-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: s_add_v2i16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_lshr_b32 s4, s16, 16 +; GFX8-NEXT: s_lshr_b32 s5, s17, 16 +; GFX8-NEXT: s_add_i32 s4, s4, s5 +; GFX8-NEXT: s_add_i32 s16, s16, s17 +; GFX8-NEXT: s_and_b32 s4, 0xffff, s4 +; GFX8-NEXT: s_and_b32 s5, 0xffff, s16 +; GFX8-NEXT: s_lshl_b32 s4, s4, 16 +; GFX8-NEXT: s_or_b32 s4, s5, s4 +; GFX8-NEXT: v_mov_b32_e32 v0, s4 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: s_add_v2i16: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_lshr_b32 s4, s16, 16 +; GFX10-NEXT: s_lshr_b32 s5, s17, 16 +; GFX10-NEXT: s_add_i32 s16, s16, s17 +; GFX10-NEXT: s_add_i32 s4, s4, s5 +; GFX10-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: s_add_v2i16: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_lshr_b32 s2, s0, 16 +; GFX11-NEXT: s_lshr_b32 s3, s1, 16 +; GFX11-NEXT: s_add_i32 s0, s0, s1 +; GFX11-NEXT: s_add_i32 s2, s2, s3 +; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s2 +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: s_add_v2i16: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_lshr_b32 s2, s0, 16 +; GFX12-NEXT: s_lshr_b32 s3, s1, 16 +; GFX12-NEXT: s_add_co_i32 s0, s0, s1 +; GFX12-NEXT: s_wait_alu 0xfffe +; GFX12-NEXT: s_add_co_i32 s2, s2, s3 +; GFX12-NEXT: s_wait_alu 0xfffe +; GFX12-NEXT: s_pack_ll_b32_b16 s0, s0, s2 +; GFX12-NEXT: s_wait_alu 0xfffe +; GFX12-NEXT: v_mov_b32_e32 v0, s0 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %c = add <2 x i16> %a, %b + ret <2 x i16> %c +} + +define <2 x i16> @v_add_v2i16(<2 x i16> %a, <2 x i16> %b) { +; GFX7-LABEL: v_add_v2i16: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v3 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_add_v2i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_pk_add_u16 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_add_v2i16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_add_u16_e32 v2, v0, v1 +; GFX8-NEXT: v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_add_v2i16: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_pk_add_u16 v0, v0, v1 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_add_v2i16: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_pk_add_u16 v0, v0, v1 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_add_v2i16: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_pk_add_u16 v0, v0, v1 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %c = add <2 x i16> %a, %b + ret <2 x i16> %c +} + +define i64 @s_add_i64(i64 inreg %a, i64 inreg %b) { +; GFX7-LABEL: s_add_i64: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_add_u32 s4, s16, s18 +; GFX7-NEXT: s_addc_u32 s5, s17, s19 +; GFX7-NEXT: v_mov_b32_e32 v0, s4 +; GFX7-NEXT: v_mov_b32_e32 v1, s5 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: s_add_i64: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_add_u32 s4, s16, s18 +; GFX9-NEXT: s_addc_u32 s5, s17, s19 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: s_add_i64: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_add_u32 s4, s16, s18 +; GFX8-NEXT: s_addc_u32 s5, s17, s19 +; GFX8-NEXT: v_mov_b32_e32 v0, s4 +; GFX8-NEXT: v_mov_b32_e32 v1, s5 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: s_add_i64: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_add_u32 s4, s16, s18 +; GFX10-NEXT: s_addc_u32 s5, s17, s19 +; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: v_mov_b32_e32 v1, s5 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: s_add_i64: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_add_u32 s0, s0, s2 +; GFX11-NEXT: s_addc_u32 s1, s1, s3 +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: s_add_i64: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3] +; GFX12-NEXT: s_wait_alu 0xfffe +; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %c = add i64 %a, %b + ret i64 %c +} + +define i64 @v_add_i64(i64 %a, i64 %b) { +; GFX7-LABEL: v_add_i64: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_add_i64: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_add_i64: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 +; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_add_i64: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_add_i64: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 +; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_add_i64: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 +; GFX12-NEXT: s_wait_alu 0xfffd +; GFX12-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %c = add i64 %a, %b + ret i64 %c +} + +define void @s_uaddo_uadde(i64 inreg %a, i64 inreg %b, ptr addrspace(1) %res, ptr addrspace(1) %carry) { +; GFX7-LABEL: s_uaddo_uadde: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_add_u32 s4, s16, s18 +; GFX7-NEXT: s_addc_u32 s5, s17, s19 +; GFX7-NEXT: v_mov_b32_e32 v4, s4 +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_cselect_b32 s8, 1, 0 +; GFX7-NEXT: v_mov_b32_e32 v5, s5 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_store_dwordx2 v[4:5], v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: v_mov_b32_e32 v0, s8 +; GFX7-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: s_uaddo_uadde: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_add_u32 s4, s16, s18 +; GFX9-NEXT: s_addc_u32 s5, s17, s19 +; GFX9-NEXT: v_mov_b32_e32 v4, s4 +; GFX9-NEXT: s_cselect_b32 s6, 1, 0 +; GFX9-NEXT: v_mov_b32_e32 v5, s5 +; GFX9-NEXT: global_store_dwordx2 v[0:1], v[4:5], off +; GFX9-NEXT: v_mov_b32_e32 v0, s6 +; GFX9-NEXT: global_store_dword v[2:3], v0, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: s_uaddo_uadde: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_add_u32 s4, s16, s18 +; GFX8-NEXT: s_addc_u32 s5, s17, s19 +; GFX8-NEXT: v_mov_b32_e32 v4, s4 +; GFX8-NEXT: s_cselect_b32 s6, 1, 0 +; GFX8-NEXT: v_mov_b32_e32 v5, s5 +; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[4:5] +; GFX8-NEXT: v_mov_b32_e32 v0, s6 +; GFX8-NEXT: flat_store_dword v[2:3], v0 +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: s_uaddo_uadde: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_add_u32 s4, s16, s18 +; GFX10-NEXT: s_addc_u32 s5, s17, s19 +; GFX10-NEXT: s_cselect_b32 s6, 1, 0 +; GFX10-NEXT: v_mov_b32_e32 v4, s4 +; GFX10-NEXT: v_mov_b32_e32 v5, s5 +; GFX10-NEXT: v_mov_b32_e32 v6, s6 +; GFX10-NEXT: global_store_dwordx2 v[0:1], v[4:5], off +; GFX10-NEXT: global_store_dword v[2:3], v6, off +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: s_uaddo_uadde: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_add_u32 s0, s0, s2 +; GFX11-NEXT: s_addc_u32 s1, s1, s3 +; GFX11-NEXT: s_cselect_b32 s2, 1, 0 +; GFX11-NEXT: v_dual_mov_b32 v5, s1 :: v_dual_mov_b32 v4, s0 +; GFX11-NEXT: v_mov_b32_e32 v6, s2 +; GFX11-NEXT: global_store_b64 v[0:1], v[4:5], off +; GFX11-NEXT: global_store_b32 v[2:3], v6, off +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: s_uaddo_uadde: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_add_co_u32 s0, s0, s2 +; GFX12-NEXT: s_add_co_ci_u32 s1, s1, s3 +; GFX12-NEXT: s_cselect_b32 s2, 1, 0 +; GFX12-NEXT: s_wait_alu 0xfffe +; GFX12-NEXT: v_dual_mov_b32 v5, s1 :: v_dual_mov_b32 v4, s0 +; GFX12-NEXT: v_mov_b32_e32 v6, s2 +; GFX12-NEXT: global_store_b64 v[0:1], v[4:5], off +; GFX12-NEXT: global_store_b32 v[2:3], v6, off +; GFX12-NEXT: s_setpc_b64 s[30:31] + %uaddo = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) + %add = extractvalue {i64, i1} %uaddo, 0 + %of = extractvalue {i64, i1} %uaddo, 1 + %of32 = select i1 %of, i32 1, i32 0 + store i64 %add, ptr addrspace(1) %res + store i32 %of32, ptr addrspace(1) %carry + ret void +} + +define void @v_uaddo_uadde(i64 %a, i64 %b, ptr addrspace(1) %res, ptr addrspace(1) %carry) { +; GFX7-LABEL: v_uaddo_uadde: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX7-NEXT: buffer_store_dwordx2 v[0:1], v[4:5], s[4:7], 0 addr64 +; GFX7-NEXT: buffer_store_dword v2, v[6:7], s[4:7], 0 addr64 +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_uaddo_uadde: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX9-NEXT: global_store_dwordx2 v[4:5], v[0:1], off +; GFX9-NEXT: global_store_dword v[6:7], v2, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_uaddo_uadde: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 +; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX8-NEXT: flat_store_dwordx2 v[4:5], v[0:1] +; GFX8-NEXT: flat_store_dword v[6:7], v2 +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_uaddo_uadde: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX10-NEXT: global_store_dwordx2 v[4:5], v[0:1], off +; GFX10-NEXT: global_store_dword v[6:7], v2, off +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_uaddo_uadde: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 +; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX11-NEXT: global_store_b64 v[4:5], v[0:1], off +; GFX11-NEXT: global_store_b32 v[6:7], v2, off +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_uaddo_uadde: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 +; GFX12-NEXT: s_wait_alu 0xfffd +; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX12-NEXT: s_wait_alu 0xfffd +; GFX12-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX12-NEXT: global_store_b64 v[4:5], v[0:1], off +; GFX12-NEXT: global_store_b32 v[6:7], v2, off +; GFX12-NEXT: s_setpc_b64 s[30:31] + %uaddo = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) + %add = extractvalue {i64, i1} %uaddo, 0 + %of = extractvalue {i64, i1} %uaddo, 1 + %of32 = select i1 %of, i32 1, i32 0 + store i64 %add, ptr addrspace(1) %res + store i32 %of32, ptr addrspace(1) %carry + ret void +} + +declare {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll new file mode 100644 index 000000000000..1a7ccf083568 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX7 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX8 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s + +define amdgpu_kernel void @fcmp_uniform_select(float %a, i32 %b, i32 %c, ptr addrspace(1) %out) { +; GFX7-LABEL: fcmp_uniform_select: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x9 +; GFX7-NEXT: s_load_dword s3, s[4:5], 0xb +; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xd +; GFX7-NEXT: s_mov_b32 s2, -1 +; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: v_cmp_eq_f32_e64 s[4:5], s6, 0 +; GFX7-NEXT: s_or_b64 s[4:5], s[4:5], s[4:5] +; GFX7-NEXT: s_cselect_b32 s4, 1, 0 +; GFX7-NEXT: s_and_b32 s4, s4, 1 +; GFX7-NEXT: s_cmp_lg_u32 s4, 0 +; GFX7-NEXT: s_cselect_b32 s3, s7, s3 +; GFX7-NEXT: v_mov_b32_e32 v0, s3 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GFX7-NEXT: s_endpgm +; +; GFX8-LABEL: fcmp_uniform_select: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX8-NEXT: s_load_dword s6, s[4:5], 0x2c +; GFX8-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x34 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: v_cmp_eq_f32_e64 s[4:5], s0, 0 +; GFX8-NEXT: s_cmp_lg_u64 s[4:5], 0 +; GFX8-NEXT: s_cselect_b32 s0, 1, 0 +; GFX8-NEXT: s_and_b32 s0, s0, 1 +; GFX8-NEXT: s_cmp_lg_u32 s0, 0 +; GFX8-NEXT: s_cselect_b32 s0, s1, s6 +; GFX8-NEXT: v_mov_b32_e32 v0, s2 +; GFX8-NEXT: v_mov_b32_e32 v2, s0 +; GFX8-NEXT: v_mov_b32_e32 v1, s3 +; GFX8-NEXT: flat_store_dword v[0:1], v2 +; GFX8-NEXT: s_endpgm +; +; GFX11-LABEL: fcmp_uniform_select: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_clause 0x2 +; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-NEXT: s_load_b32 s6, s[4:5], 0x2c +; GFX11-NEXT: s_load_b64 s[2:3], s[4:5], 0x34 +; GFX11-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: v_cmp_eq_f32_e64 s0, s0, 0 +; GFX11-NEXT: s_cmp_lg_u32 s0, 0 +; GFX11-NEXT: s_cselect_b32 s0, 1, 0 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: s_and_b32 s0, s0, 1 +; GFX11-NEXT: s_cmp_lg_u32 s0, 0 +; GFX11-NEXT: s_cselect_b32 s0, s1, s6 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: global_store_b32 v1, v0, s[2:3] +; GFX11-NEXT: s_endpgm + %cmp = fcmp oeq float %a, 0.0 + %sel = select i1 %cmp, i32 %b, i32 %c + store i32 %sel, ptr addrspace(1) %out + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.mir new file mode 100644 index 000000000000..67cc0169af61 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.mir @@ -0,0 +1,37 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn -mcpu=gfx700 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GFX7 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx803 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GF8 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GFX11 %s + +--- +name: test_copy_scc_vcc +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + ; GFX7-LABEL: name: test_copy_scc_vcc + ; GFX7: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GFX7-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[DEF]], [[DEF]], implicit-def $scc + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $scc + ; GFX7-NEXT: $sgpr0 = COPY [[COPY]] + ; GFX7-NEXT: S_ENDPGM 0, implicit $sgpr0 + ; + ; GF8-LABEL: name: test_copy_scc_vcc + ; GF8: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GF8-NEXT: S_CMP_LG_U64 [[DEF]], 0, implicit-def $scc + ; GF8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $scc + ; GF8-NEXT: $sgpr0 = COPY [[COPY]] + ; GF8-NEXT: S_ENDPGM 0, implicit $sgpr0 + ; + ; GFX11-LABEL: name: test_copy_scc_vcc + ; GFX11: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF + ; GFX11-NEXT: S_CMP_LG_U32 [[DEF]], 0, implicit-def $scc + ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $scc + ; GFX11-NEXT: $sgpr0 = COPY [[COPY]] + ; GFX11-NEXT: S_ENDPGM 0, implicit $sgpr0 + %0:vcc(s1) = G_IMPLICIT_DEF + %1:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC %0 + $sgpr0 = COPY %1 + S_ENDPGM 0, implicit $sgpr0 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.mir new file mode 100644 index 000000000000..097372a95746 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.mir @@ -0,0 +1,524 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=amdgpu-regbankselect,amdgpu-regbanklegalize %s -verify-machineinstrs -o - | FileCheck %s +--- +name: add_s16_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + ; CHECK-LABEL: name: add_s16_ss + ; CHECK: liveins: $sgpr0, $sgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[ANYEXT]], [[ANYEXT1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[ADD]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC2]], [[TRUNC2]] + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s16) = G_TRUNC %0 + %3:_(s16) = G_TRUNC %1 + %4:_(s16) = G_ADD %2, %3 + %5:_(s16) = G_AND %4, %4 +... + +--- +name: add_s16_sv +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; CHECK-LABEL: name: add_s16_sv + ; CHECK: liveins: $sgpr0, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s16) = G_ADD [[COPY2]], [[TRUNC1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[ADD]], [[ADD]] + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $vgpr0 + %2:_(s16) = G_TRUNC %0 + %3:_(s16) = G_TRUNC %1 + %4:_(s16) = G_ADD %2, %3 + %5:_(s16) = G_AND %4, %4 +... + +--- +name: add_s16_vs +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; CHECK-LABEL: name: add_s16_vs + ; CHECK: liveins: $sgpr0, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s16) = G_ADD [[TRUNC]], [[COPY2]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[ADD]], [[ADD]] + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $sgpr0 + %2:_(s16) = G_TRUNC %0 + %3:_(s16) = G_TRUNC %1 + %4:_(s16) = G_ADD %2, %3 + %5:_(s16) = G_AND %4, %4 +... + +--- +name: add_s16_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: add_s16_vv + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s16) = G_ADD [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[ADD]], [[ADD]] + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %0 + %3:_(s16) = G_TRUNC %1 + %4:_(s16) = G_ADD %2, %3 + %5:_(s16) = G_AND %4, %4 +... + +--- +name: add_s32_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + ; CHECK-LABEL: name: add_s32_ss + ; CHECK: liveins: $sgpr0, $sgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[ADD]], [[ADD]] + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = G_ADD %0, %1 + %3:_(s32) = G_AND %2, %2 +... + +--- +name: add_s32_sv +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; CHECK-LABEL: name: add_s32_sv + ; CHECK: liveins: $sgpr0, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ADD]], [[ADD]] + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = G_ADD %0, %1 + %3:_(s32) = G_AND %2, %2 +... + +--- +name: add_s32_vs +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; CHECK-LABEL: name: add_s32_vs + ; CHECK: liveins: $sgpr0, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ADD]], [[ADD]] + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $sgpr0 + %2:_(s32) = G_ADD %0, %1 + %3:_(s32) = G_AND %2, %2 +... + +--- +name: add_s32_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: add_s32_vv + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ADD]], [[ADD]] + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = G_ADD %0, %1 + %3:_(s32) = G_AND %2, %2 +... + +--- +name: add_s64_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; CHECK-LABEL: name: add_s64_ss + ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s64) = G_ADD [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s64) = G_AND [[ADD]], [[ADD]] + %0:_(s64) = COPY $sgpr0_sgpr1 + %1:_(s64) = COPY $sgpr2_sgpr3 + %2:_(s64) = G_ADD %0, %1 + %3:_(s64) = G_CONSTANT i64 255 + %4:_(s64) = G_AND %2, %2 +... + +--- +name: add_s64_sv +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 + ; CHECK-LABEL: name: add_s64_sv + ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s64) = G_ADD [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ADD]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ADD]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(s64) = COPY $sgpr0_sgpr1 + %1:_(s64) = COPY $vgpr0_vgpr1 + %2:_(s64) = G_ADD %0, %1 + %3:_(s64) = G_AND %2, %2 +... + +--- +name: add_s64_vs +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 + ; CHECK-LABEL: name: add_s64_vs + ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s64) = G_ADD [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ADD]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ADD]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $sgpr0_sgpr1 + %2:_(s64) = G_ADD %0, %1 + %3:_(s64) = G_AND %2, %2 +... + +--- +name: add_s64_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK-LABEL: name: add_s64_vv + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s64) = G_ADD [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ADD]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ADD]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s64) = G_ADD %0, %1 + %3:_(s64) = G_AND %2, %2 +... + +--- +name: uaddo_s32_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + ; CHECK-LABEL: name: uaddo_s32_ss + ; CHECK: liveins: $sgpr0, $sgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:sgpr(s32), [[UADDO1:%[0-9]+]]:sgpr(s32) = G_UADDO [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[UADDO1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[SELECT]], [[UADDO]] + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32), %3:_(s1) = G_UADDO %0, %1 + %4:_(s32) = G_ZEXT %3 + %5:_(s32) = G_AND %4, %2 +... + +--- +name: uaddo_s32_sv +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr1 + ; CHECK-LABEL: name: uaddo_s32_sv + ; CHECK: liveins: $sgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[UADDO1]](s1), [[C]], [[C1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UADDO]], [[SELECT]] + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32), %3:_(s1) = G_UADDO %0, %1 + %4:_(s32) = G_ZEXT %3 + %5:_(s32) = G_AND %2, %4 +... + +--- +name: uaddo_s32_vs +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $sgpr1 + ; CHECK-LABEL: name: uaddo_s32_vs + ; CHECK: liveins: $vgpr0, $sgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[UADDO1]](s1), [[C]], [[C1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UADDO]], [[SELECT]] + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32), %3:_(s1) = G_UADDO %0, %1 + %4:_(s32) = G_ZEXT %3 + %5:_(s32) = G_AND %2, %4 +... + +--- +name: uaddo_s32_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: uaddo_s32_vv + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[UADDO1]](s1), [[C]], [[C1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UADDO]], [[SELECT]] + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32), %3:_(s1) = G_UADDO %0, %1 + %4:_(s32) = G_ZEXT %3 + %5:_(s32) = G_AND %2, %4 +... + +--- +name: uadde_s32_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2 + ; CHECK-LABEL: name: uadde_s32_ss + ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[COPY2]], [[C]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[COPY]], [[COPY1]], [[AND]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[UADDE1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND1]](s32), [[C]], [[C1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:sgpr(s32) = G_AND [[UADDE]], [[SELECT]] + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $sgpr2 + %3:_(s1) = G_TRUNC %2 + %4:_(s32), %5:_(s1) = G_UADDE %0, %1, %3 + %6:_(s32) = G_ZEXT %5 + %7:_(s32) = G_AND %4, %6 +... + +--- +name: uadde_s32_sv +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr1, $sgpr2 + ; CHECK-LABEL: name: uadde_s32_sv + ; CHECK: liveins: $sgpr0, $vgpr1, $sgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:vcc(s1) = G_AMDGPU_COPY_VCC_SCC [[COPY2]](s32) + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY3]], [[COPY1]], [[AMDGPU_COPY_VCC_SCC]] + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[UADDE1]](s1), [[C]], [[C1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UADDE]], [[SELECT]] + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $sgpr2 + %3:_(s1) = G_TRUNC %2 + %4:_(s32), %5:_(s1) = G_UADDE %0, %1, %3 + %6:_(s32) = G_ZEXT %5 + %7:_(s32) = G_AND %4, %6 +... + +--- +name: uadde_s32_vs +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $sgpr1, $sgpr2 + ; CHECK-LABEL: name: uadde_s32_vs + ; CHECK: liveins: $vgpr0, $sgpr1, $sgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:vcc(s1) = G_AMDGPU_COPY_VCC_SCC [[COPY2]](s32) + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY]], [[COPY3]], [[AMDGPU_COPY_VCC_SCC]] + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[UADDE1]](s1), [[C]], [[C1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UADDE]], [[SELECT]] + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $sgpr2 + %3:_(s1) = G_TRUNC %2 + %4:_(s32), %5:_(s1) = G_UADDE %0, %1, %3 + %6:_(s32) = G_ZEXT %5 + %7:_(s32) = G_AND %4, %6 +... + +--- +name: uadde_s32_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-LABEL: name: uadde_s32_vv + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[COPY2]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[AND]](s32), [[C1]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY]], [[COPY1]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[UADDE1]](s1), [[C]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UADDE]], [[SELECT]] + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s1) = G_TRUNC %2 + %4:_(s32), %5:_(s1) = G_UADDE %0, %1, %3 + %6:_(s32) = G_ZEXT %5 + %7:_(s32) = G_AND %4, %6 +... + +--- +name: uadde_s32_ss_scc_use +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2 + ; CHECK-LABEL: name: uadde_s32_ss_scc_use + ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[COPY2]], [[C]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[COPY]], [[COPY1]], [[AND]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[UADDE1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND1]](s32), [[C]], [[C1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:sgpr(s32) = G_AND [[UADDE]], [[SELECT]] + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $sgpr2 + %3:_(s1) = G_TRUNC %2 + %4:_(s32), %5:_(s1) = G_UADDE %0, %1, %3 + %6:_(s32) = G_ZEXT %5 + %8:_(s32) = G_AND %4, %6 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s16.mir index 54ee69fcb220..30c958fcb192 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s16.mir @@ -1,6 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=amdgpu-regbankselect,amdgpu-regbanklegalize %s -verify-machineinstrs -o - | FileCheck %s --- name: add_s16_ss legalized: true @@ -19,13 +18,13 @@ body: | ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s16) ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[ANYEXT]], [[ANYEXT1]] ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[ADD]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC2]](s16) + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC2]], [[TRUNC2]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s16) = G_TRUNC %0 %3:_(s16) = G_TRUNC %1 %4:_(s16) = G_ADD %2, %3 - S_ENDPGM 0, implicit %4 + %5:_(s16) = G_AND %4, %4 ... --- @@ -44,13 +43,13 @@ body: | ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC]](s16) ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s16) = G_ADD [[COPY2]], [[TRUNC1]] - ; CHECK-NEXT: S_ENDPGM 0, implicit [[ADD]](s16) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[ADD]], [[ADD]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s16) = G_TRUNC %0 %3:_(s16) = G_TRUNC %1 %4:_(s16) = G_ADD %2, %3 - S_ENDPGM 0, implicit %4 + %5:_(s16) = G_AND %4, %4 ... --- @@ -69,13 +68,13 @@ body: | ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC1]](s16) ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s16) = G_ADD [[TRUNC]], [[COPY2]] - ; CHECK-NEXT: S_ENDPGM 0, implicit [[ADD]](s16) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[ADD]], [[ADD]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s16) = G_TRUNC %0 %3:_(s16) = G_TRUNC %1 %4:_(s16) = G_ADD %2, %3 - S_ENDPGM 0, implicit %4 + %5:_(s16) = G_AND %4, %4 ... --- @@ -93,11 +92,11 @@ body: | ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s16) = G_ADD [[TRUNC]], [[TRUNC1]] - ; CHECK-NEXT: S_ENDPGM 0, implicit [[ADD]](s16) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[ADD]], [[ADD]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 %3:_(s16) = G_TRUNC %1 %4:_(s16) = G_ADD %2, %3 - S_ENDPGM 0, implicit %4 + %5:_(s16) = G_AND %4, %4 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.v2s16.mir index 97018fac13a8..01eb39111b0a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.v2s16.mir @@ -1,6 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s -# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-regbankselect,amdgpu-regbanklegalize %s -verify-machineinstrs -o - | FileCheck %s --- name: add_v2s16_ss @@ -18,16 +17,19 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[BITCAST]], [[BITCAST1]] ; CHECK-NEXT: [[ADD1:%[0-9]+]]:sgpr(s32) = G_ADD [[LSHR]], [[LSHR1]] ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ADD]](s32), [[ADD1]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s16) = G_CONSTANT i16 255 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16) + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(<2 x s16>) = G_AND [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR]] %0:_(<2 x s16>) = COPY $sgpr0 %1:_(<2 x s16>) = COPY $sgpr1 %2:_(<2 x s16>) = G_ADD %0, %1 - S_ENDPGM 0, implicit %2 + %3:_(s16) = G_CONSTANT i16 255 + %4:_(<2 x s16>) = G_BUILD_VECTOR %3, %3 + %5:_(<2 x s16>) = G_AND %2, %4 ... --- @@ -44,11 +46,11 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(<2 x s16>) = G_ADD [[COPY2]], [[COPY1]] - ; CHECK-NEXT: S_ENDPGM 0, implicit [[ADD]](<2 x s16>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[ADD]], [[ADD]] %0:_(<2 x s16>) = COPY $sgpr0 %1:_(<2 x s16>) = COPY $vgpr0 %2:_(<2 x s16>) = G_ADD %0, %1 - S_ENDPGM 0, implicit %2 + %3:_(<2 x s16>) = G_AND %2, %2 ... --- @@ -65,9 +67,11 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(<2 x s16>) = G_ADD [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[ADD]], [[ADD]] %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $sgpr0 %2:_(<2 x s16>) = G_ADD %0, %1 + %3:_(<2 x s16>) = G_AND %2, %2 ... --- @@ -83,9 +87,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1 ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(<2 x s16>) = G_ADD [[COPY]], [[COPY1]] - ; CHECK-NEXT: S_ENDPGM 0, implicit [[ADD]](<2 x s16>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[ADD]], [[ADD]] %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_ADD %0, %1 - S_ENDPGM 0, implicit %2 + %3:_(<2 x s16>) = G_AND %2, %2 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir index 7378c9366ec3..e0e783e7a62f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir @@ -77,10 +77,14 @@ body: | ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C1]], [[C2]] ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SELECT]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s16) = G_CONSTANT i16 255 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC]], [[C3]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s1) = G_ICMP intpred(eq), %0, %1 %3:_(s16) = G_SEXT %2 + %4:_(s16) = G_CONSTANT i16 255 + %5:_(s16) = G_AND %3, %4 ... --- @@ -215,9 +219,13 @@ body: | ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C1]], [[C2]] ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SELECT]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s16) = G_CONSTANT i16 255 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC]], [[C3]] %0:_(s32) = COPY $sgpr0 %1:_(s1) = G_TRUNC %0 %2:_(s16) = G_SEXT %1 + %3:_(s16) = G_CONSTANT i16 255 + %4:_(s16) = G_AND %2, %3 ... --- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sub.mir index b0199d3ad5cd..e3c01c0e7fcb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sub.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sub.mir @@ -1,5 +1,107 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-regbankselect,amdgpu-regbanklegalize %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-regbankselect,amdgpu-regbanklegalize %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s + +--- +name: sub_s16_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + ; CHECK-LABEL: name: sub_s16_ss + ; CHECK: liveins: $sgpr0, $sgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s16) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:sgpr(s32) = G_SUB [[ANYEXT]], [[ANYEXT1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SUB]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC2]], [[TRUNC2]] + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s16) = G_TRUNC %0 + %3:_(s16) = G_TRUNC %1 + %4:_(s16) = G_SUB %2, %3 + %6:_(s16) = G_AND %4, %4 +... + +--- +name: sub_s16_sv +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; CHECK-LABEL: name: sub_s16_sv + ; CHECK: liveins: $sgpr0, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s16) = G_SUB [[COPY2]], [[TRUNC1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[SUB]], [[SUB]] + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $vgpr0 + %2:_(s16) = G_TRUNC %0 + %3:_(s16) = G_TRUNC %1 + %4:_(s16) = G_SUB %2, %3 + %6:_(s16) = G_AND %4, %4 +... + +--- +name: sub_s16_vs +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; CHECK-LABEL: name: sub_s16_vs + ; CHECK: liveins: $sgpr0, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s16) = G_SUB [[TRUNC]], [[COPY2]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[SUB]], [[SUB]] + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $sgpr0 + %2:_(s16) = G_TRUNC %0 + %3:_(s16) = G_TRUNC %1 + %4:_(s16) = G_SUB %2, %3 + %6:_(s16) = G_AND %4, %4 +... + +--- +name: sub_s16_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: sub_s16_vv + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s16) = G_SUB [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[SUB]], [[SUB]] + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %0 + %3:_(s16) = G_TRUNC %1 + %4:_(s16) = G_SUB %2, %3 + %6:_(s16) = G_AND %4, %4 +... --- name: sub_s32_ss @@ -14,9 +116,11 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[SUB:%[0-9]+]]:sgpr(s32) = G_SUB [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[SUB]], [[SUB]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_SUB %0, %1 + %4:_(s32) = G_AND %2, %2 ... --- @@ -33,9 +137,11 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[SUB]], [[SUB]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_SUB %0, %1 + %4:_(s32) = G_AND %2, %2 ... --- @@ -52,9 +158,11 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[SUB]], [[SUB]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = G_SUB %0, %1 + %4:_(s32) = G_AND %2, %2 ... --- @@ -70,7 +178,376 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[SUB]], [[SUB]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_SUB %0, %1 + %4:_(s32) = G_AND %2, %2 +... + +--- +name: sub_v2s16_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + ; CHECK-LABEL: name: sub_v2s16_ss + ; CHECK: liveins: $sgpr0, $sgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:sgpr(s32) = G_SUB [[BITCAST]], [[BITCAST1]] + ; CHECK-NEXT: [[SUB1:%[0-9]+]]:sgpr(s32) = G_SUB [[LSHR]], [[LSHR1]] + ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SUB]](s32), [[SUB1]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(<2 x s16>) = G_AND [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC]] + %0:_(<2 x s16>) = COPY $sgpr0 + %1:_(<2 x s16>) = COPY $sgpr1 + %2:_(<2 x s16>) = G_SUB %0, %1 + %5:_(<2 x s16>) = G_AND %2, %2 +... + +--- +name: sub_v2s16_sv +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; CHECK-LABEL: name: sub_v2s16_sv + ; CHECK: liveins: $sgpr0, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(<2 x s16>) = G_SUB [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[SUB]], [[SUB]] + %0:_(<2 x s16>) = COPY $sgpr0 + %1:_(<2 x s16>) = COPY $vgpr0 + %2:_(<2 x s16>) = G_SUB %0, %1 + %5:_(<2 x s16>) = G_AND %2, %2 +... + +--- +name: sub_v2s16_vs +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; CHECK-LABEL: name: sub_v2s16_vs + ; CHECK: liveins: $sgpr0, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(<2 x s16>) = G_SUB [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[SUB]], [[SUB]] + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = COPY $sgpr0 + %2:_(<2 x s16>) = G_SUB %0, %1 + %5:_(<2 x s16>) = G_AND %2, %2 +... + +--- +name: sub_v2s16_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: sub_v2s16_vv + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1 + ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(<2 x s16>) = G_SUB [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[SUB]], [[SUB]] + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = COPY $vgpr1 + %2:_(<2 x s16>) = G_SUB %0, %1 + %5:_(<2 x s16>) = G_AND %2, %2 +... + +--- +name: sub_s64_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr0_sgpr1 + ; CHECK-LABEL: name: sub_s64_ss + ; CHECK: liveins: $sgpr0_sgpr1, $sgpr0_sgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[SUB:%[0-9]+]]:sgpr(s64) = G_SUB [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s64) = G_AND [[SUB]], [[SUB]] + %0:_(s64) = COPY $sgpr0_sgpr1 + %1:_(s64) = COPY $sgpr0_sgpr1 + %2:_(s64) = G_SUB %0, %1 + %4:_(s64) = G_AND %2, %2 +... + +--- +name: sub_s64_sv +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 + ; CHECK-LABEL: name: sub_s64_sv + ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s64) = G_SUB [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[SUB]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[SUB]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(s64) = COPY $sgpr0_sgpr1 + %1:_(s64) = COPY $vgpr0_vgpr1 + %2:_(s64) = G_SUB %0, %1 + %4:_(s64) = G_AND %2, %2 +... + +--- +name: sub_s64_vs +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 + ; CHECK-LABEL: name: sub_s64_vs + ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s64) = G_SUB [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[SUB]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[SUB]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $sgpr0_sgpr1 + %2:_(s64) = G_SUB %0, %1 + %4:_(s64) = G_AND %2, %2 +... + +--- +name: sub_s64_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK-LABEL: name: sub_s64_vv + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s64) = G_SUB [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[SUB]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[SUB]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s64) = G_SUB %0, %1 + %4:_(s64) = G_AND %2, %2 +... + +--- +name: usubo_s32_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + ; CHECK-LABEL: name: usubo_s32_ss + ; CHECK: liveins: $sgpr0, $sgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[USUBO:%[0-9]+]]:sgpr(s32), [[USUBO1:%[0-9]+]]:sgpr(s32) = G_USUBO [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[USUBO]], [[USUBO]] + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32), %3:_(s1) = G_USUBO %0, %1 + %5:_(s32) = G_AND %2, %2 +... + +--- +name: usubo_s32_sv +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr1 + ; CHECK-LABEL: name: usubo_s32_sv + ; CHECK: liveins: $sgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[USUBO:%[0-9]+]]:vgpr(s32), [[USUBO1:%[0-9]+]]:vcc(s1) = G_USUBO [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[USUBO]], [[USUBO]] + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32), %3:_(s1) = G_USUBO %0, %1 + %5:_(s32) = G_AND %2, %2 +... + +--- +name: usubo_s32_vs +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $sgpr1 + ; CHECK-LABEL: name: usubo_s32_vs + ; CHECK: liveins: $vgpr0, $sgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[USUBO:%[0-9]+]]:vgpr(s32), [[USUBO1:%[0-9]+]]:vcc(s1) = G_USUBO [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[USUBO]], [[USUBO]] + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32), %3:_(s1) = G_USUBO %0, %1 + %5:_(s32) = G_AND %2, %2 +... + +--- +name: usubo_s32_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: usubo_s32_vv + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[USUBO:%[0-9]+]]:vgpr(s32), [[USUBO1:%[0-9]+]]:vcc(s1) = G_USUBO [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[USUBO]], [[USUBO]] + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32), %3:_(s1) = G_USUBO %0, %1 + %5:_(s32) = G_AND %2, %2 +... + +--- +name: usube_s32_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2 + ; CHECK-LABEL: name: usube_s32_ss + ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[COPY2]], [[C]] + ; CHECK-NEXT: [[USUBE:%[0-9]+]]:sgpr(s32), [[USUBE1:%[0-9]+]]:sgpr(s32) = G_USUBE [[COPY]], [[COPY1]], [[AND]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[USUBE]], [[USUBE]] + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $sgpr2 + %3:_(s1) = G_TRUNC %2 + %4:_(s32), %5:_(s1) = G_USUBE %0, %1, %3 + %7:_(s32) = G_AND %4, %4 +... + +--- +name: usube_s32_sv +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr1, $sgpr2 + ; CHECK-LABEL: name: usube_s32_sv + ; CHECK: liveins: $sgpr0, $vgpr1, $sgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:vcc(s1) = G_AMDGPU_COPY_VCC_SCC [[COPY2]](s32) + ; CHECK-NEXT: [[USUBE:%[0-9]+]]:vgpr(s32), [[USUBE1:%[0-9]+]]:vcc(s1) = G_USUBE [[COPY3]], [[COPY1]], [[AMDGPU_COPY_VCC_SCC]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[USUBE]], [[USUBE]] + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $sgpr2 + %3:_(s1) = G_TRUNC %2 + %4:_(s32), %5:_(s1) = G_USUBE %0, %1, %3 + %7:_(s32) = G_AND %4, %4 +... + +--- +name: usube_s32_vs +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $sgpr1, $sgpr2 + ; CHECK-LABEL: name: usube_s32_vs + ; CHECK: liveins: $vgpr0, $sgpr1, $sgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:vcc(s1) = G_AMDGPU_COPY_VCC_SCC [[COPY2]](s32) + ; CHECK-NEXT: [[USUBE:%[0-9]+]]:vgpr(s32), [[USUBE1:%[0-9]+]]:vcc(s1) = G_USUBE [[COPY]], [[COPY3]], [[AMDGPU_COPY_VCC_SCC]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[USUBE]], [[USUBE]] + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $sgpr2 + %3:_(s1) = G_TRUNC %2 + %4:_(s32), %5:_(s1) = G_USUBE %0, %1, %3 + %7:_(s32) = G_AND %4, %4 +... + +--- +name: usube_s32_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-LABEL: name: usube_s32_vv + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[COPY2]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[AND]](s32), [[C1]] + ; CHECK-NEXT: [[USUBE:%[0-9]+]]:vgpr(s32), [[USUBE1:%[0-9]+]]:vcc(s1) = G_USUBE [[COPY]], [[COPY1]], [[ICMP]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[USUBE]], [[USUBE]] + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s1) = G_TRUNC %2 + %4:_(s32), %5:_(s1) = G_USUBE %0, %1, %3 + %7:_(s32) = G_AND %4, %4 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir index 088c20a3137f..d4baa5fb864f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir @@ -73,10 +73,14 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C]], [[C1]] ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SELECT]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s16) = G_CONSTANT i16 255 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC]], [[C2]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s1) = G_ICMP intpred(eq), %0, %1 %3:_(s16) = G_ZEXT %2 + %4:_(s16) = G_CONSTANT i16 255 + %5:_(s16) = G_AND %3, %4 ... --- @@ -209,9 +213,13 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C]], [[C1]] ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SELECT]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s16) = G_CONSTANT i16 255 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC]], [[C2]] %0:_(s32) = COPY $sgpr0 %1:_(s1) = G_TRUNC %0 %2:_(s16) = G_ZEXT %1 + %3:_(s16) = G_CONSTANT i16 255 + %4:_(s16) = G_AND %2, %3 ... --- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sub.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sub.ll new file mode 100644 index 000000000000..8b5958daac16 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sub.ll @@ -0,0 +1,535 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX11 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX12 %s + +define i16 @s_sub_i16(i16 inreg %a, i16 inreg %b) { +; GFX7-LABEL: s_sub_i16: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_sub_i32 s4, s16, s17 +; GFX7-NEXT: v_mov_b32_e32 v0, s4 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: s_sub_i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_sub_i32 s4, s16, s17 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: s_sub_i16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_sub_i32 s4, s16, s17 +; GFX8-NEXT: v_mov_b32_e32 v0, s4 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: s_sub_i16: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_sub_i32 s4, s16, s17 +; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: s_sub_i16: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_sub_i32 s0, s0, s1 +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: s_sub_i16: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_sub_co_i32 s0, s0, s1 +; GFX12-NEXT: s_wait_alu 0xfffe +; GFX12-NEXT: v_mov_b32_e32 v0, s0 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %c = sub i16 %a, %b + ret i16 %c +} + +define i16 @v_sub_i16(i16 %a, i16 %b) { +; GFX7-LABEL: v_sub_i16: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_sub_i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_sub_u16_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_sub_i16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_sub_u16_e32 v0, v0, v1 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_sub_i16: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_sub_nc_u16 v0, v0, v1 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_sub_i16: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_sub_nc_u16 v0.l, v0.l, v1.l +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_sub_i16: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_sub_nc_u16 v0, v0, v1 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %c = sub i16 %a, %b + ret i16 %c +} + +define i32 @s_sub_i32(i32 inreg %a, i32 inreg %b) { +; GFX7-LABEL: s_sub_i32: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_sub_i32 s4, s16, s17 +; GFX7-NEXT: v_mov_b32_e32 v0, s4 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: s_sub_i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_sub_i32 s4, s16, s17 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: s_sub_i32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_sub_i32 s4, s16, s17 +; GFX8-NEXT: v_mov_b32_e32 v0, s4 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: s_sub_i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_sub_i32 s4, s16, s17 +; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: s_sub_i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_sub_i32 s0, s0, s1 +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: s_sub_i32: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_sub_co_i32 s0, s0, s1 +; GFX12-NEXT: s_wait_alu 0xfffe +; GFX12-NEXT: v_mov_b32_e32 v0, s0 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %c = sub i32 %a, %b + ret i32 %c +} + +define i32 @v_sub_i32(i32 %a, i32 %b) { +; GFX7-LABEL: v_sub_i32: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_sub_i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_sub_u32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_sub_i32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v1 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_sub_i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_sub_nc_u32_e32 v0, v0, v1 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_sub_i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_sub_nc_u32_e32 v0, v0, v1 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_sub_i32: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_sub_nc_u32_e32 v0, v0, v1 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %c = sub i32 %a, %b + ret i32 %c +} + +; TODO: Add test for s_sub_v2i16. Instruction selector currently fails +; to handle G_UNMERGE_VALUES. + +define <2 x i16> @v_sub_v2i16(<2 x i16> %a, <2 x i16> %b) { +; GFX7-LABEL: v_sub_v2i16: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 +; GFX7-NEXT: v_sub_i32_e32 v1, vcc, v1, v3 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_sub_v2i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_pk_sub_i16 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_sub_v2i16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_sub_u16_e32 v2, v0, v1 +; GFX8-NEXT: v_sub_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_sub_v2i16: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_pk_sub_i16 v0, v0, v1 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_sub_v2i16: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_pk_sub_i16 v0, v0, v1 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_sub_v2i16: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_pk_sub_i16 v0, v0, v1 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %c = sub <2 x i16> %a, %b + ret <2 x i16> %c +} + +define i64 @s_sub_i64(i64 inreg %a, i64 inreg %b) { +; GFX7-LABEL: s_sub_i64: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_sub_u32 s4, s16, s18 +; GFX7-NEXT: s_subb_u32 s5, s17, s19 +; GFX7-NEXT: v_mov_b32_e32 v0, s4 +; GFX7-NEXT: v_mov_b32_e32 v1, s5 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: s_sub_i64: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_sub_u32 s4, s16, s18 +; GFX9-NEXT: s_subb_u32 s5, s17, s19 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: s_sub_i64: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_sub_u32 s4, s16, s18 +; GFX8-NEXT: s_subb_u32 s5, s17, s19 +; GFX8-NEXT: v_mov_b32_e32 v0, s4 +; GFX8-NEXT: v_mov_b32_e32 v1, s5 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: s_sub_i64: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_sub_u32 s4, s16, s18 +; GFX10-NEXT: s_subb_u32 s5, s17, s19 +; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: v_mov_b32_e32 v1, s5 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: s_sub_i64: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_sub_u32 s0, s0, s2 +; GFX11-NEXT: s_subb_u32 s1, s1, s3 +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: s_sub_i64: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_sub_nc_u64 s[0:1], s[0:1], s[2:3] +; GFX12-NEXT: s_wait_alu 0xfffe +; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %c = sub i64 %a, %b + ret i64 %c +} + +define i64 @v_sub_i64(i64 %a, i64 %b) { +; GFX7-LABEL: v_sub_i64: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 +; GFX7-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_sub_i64: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2 +; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_sub_i64: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v2 +; GFX8-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_sub_i64: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2 +; GFX10-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_sub_i64: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2 +; GFX11-NEXT: v_sub_co_ci_u32_e64 v1, null, v1, v3, vcc_lo +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_sub_i64: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2 +; GFX12-NEXT: s_wait_alu 0xfffd +; GFX12-NEXT: v_sub_co_ci_u32_e64 v1, null, v1, v3, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %c = sub i64 %a, %b + ret i64 %c +} + +define void @s_usubo_usube(i64 inreg %a, i64 inreg %b, ptr addrspace(1) %res, ptr addrspace(1) %carry) { +; GFX7-LABEL: s_usubo_usube: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_sub_u32 s4, s16, s18 +; GFX7-NEXT: s_subb_u32 s5, s17, s19 +; GFX7-NEXT: v_mov_b32_e32 v4, s4 +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_cselect_b32 s8, 1, 0 +; GFX7-NEXT: v_mov_b32_e32 v5, s5 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_store_dwordx2 v[4:5], v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: v_mov_b32_e32 v0, s8 +; GFX7-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: s_usubo_usube: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_sub_u32 s4, s16, s18 +; GFX9-NEXT: s_subb_u32 s5, s17, s19 +; GFX9-NEXT: v_mov_b32_e32 v4, s4 +; GFX9-NEXT: s_cselect_b32 s6, 1, 0 +; GFX9-NEXT: v_mov_b32_e32 v5, s5 +; GFX9-NEXT: global_store_dwordx2 v[0:1], v[4:5], off +; GFX9-NEXT: v_mov_b32_e32 v0, s6 +; GFX9-NEXT: global_store_dword v[2:3], v0, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: s_usubo_usube: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_sub_u32 s4, s16, s18 +; GFX8-NEXT: s_subb_u32 s5, s17, s19 +; GFX8-NEXT: v_mov_b32_e32 v4, s4 +; GFX8-NEXT: s_cselect_b32 s6, 1, 0 +; GFX8-NEXT: v_mov_b32_e32 v5, s5 +; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[4:5] +; GFX8-NEXT: v_mov_b32_e32 v0, s6 +; GFX8-NEXT: flat_store_dword v[2:3], v0 +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: s_usubo_usube: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_sub_u32 s4, s16, s18 +; GFX10-NEXT: s_subb_u32 s5, s17, s19 +; GFX10-NEXT: s_cselect_b32 s6, 1, 0 +; GFX10-NEXT: v_mov_b32_e32 v4, s4 +; GFX10-NEXT: v_mov_b32_e32 v5, s5 +; GFX10-NEXT: v_mov_b32_e32 v6, s6 +; GFX10-NEXT: global_store_dwordx2 v[0:1], v[4:5], off +; GFX10-NEXT: global_store_dword v[2:3], v6, off +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: s_usubo_usube: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_sub_u32 s0, s0, s2 +; GFX11-NEXT: s_subb_u32 s1, s1, s3 +; GFX11-NEXT: s_cselect_b32 s2, 1, 0 +; GFX11-NEXT: v_dual_mov_b32 v5, s1 :: v_dual_mov_b32 v4, s0 +; GFX11-NEXT: v_mov_b32_e32 v6, s2 +; GFX11-NEXT: global_store_b64 v[0:1], v[4:5], off +; GFX11-NEXT: global_store_b32 v[2:3], v6, off +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: s_usubo_usube: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_sub_co_u32 s0, s0, s2 +; GFX12-NEXT: s_sub_co_ci_u32 s1, s1, s3 +; GFX12-NEXT: s_cselect_b32 s2, 1, 0 +; GFX12-NEXT: s_wait_alu 0xfffe +; GFX12-NEXT: v_dual_mov_b32 v5, s1 :: v_dual_mov_b32 v4, s0 +; GFX12-NEXT: v_mov_b32_e32 v6, s2 +; GFX12-NEXT: global_store_b64 v[0:1], v[4:5], off +; GFX12-NEXT: global_store_b32 v[2:3], v6, off +; GFX12-NEXT: s_setpc_b64 s[30:31] + %usubo = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) + %sub = extractvalue {i64, i1} %usubo, 0 + %of = extractvalue {i64, i1} %usubo, 1 + %of32 = select i1 %of, i32 1, i32 0 + store i64 %sub, ptr addrspace(1) %res + store i32 %of32, ptr addrspace(1) %carry + ret void +} + +define void @v_usubo_usube(i64 %a, i64 %b, ptr addrspace(1) %res, ptr addrspace(1) %carry) { +; GFX7-LABEL: v_usubo_usube: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 +; GFX7-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX7-NEXT: buffer_store_dwordx2 v[0:1], v[4:5], s[4:7], 0 addr64 +; GFX7-NEXT: buffer_store_dword v2, v[6:7], s[4:7], 0 addr64 +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_usubo_usube: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2 +; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX9-NEXT: global_store_dwordx2 v[4:5], v[0:1], off +; GFX9-NEXT: global_store_dword v[6:7], v2, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_usubo_usube: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v2 +; GFX8-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX8-NEXT: flat_store_dwordx2 v[4:5], v[0:1] +; GFX8-NEXT: flat_store_dword v[6:7], v2 +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_usubo_usube: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2 +; GFX10-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX10-NEXT: global_store_dwordx2 v[4:5], v[0:1], off +; GFX10-NEXT: global_store_dword v[6:7], v2, off +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_usubo_usube: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2 +; GFX11-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX11-NEXT: global_store_b64 v[4:5], v[0:1], off +; GFX11-NEXT: global_store_b32 v[6:7], v2, off +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_usubo_usube: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2 +; GFX12-NEXT: s_wait_alu 0xfffd +; GFX12-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX12-NEXT: s_wait_alu 0xfffd +; GFX12-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX12-NEXT: global_store_b64 v[4:5], v[0:1], off +; GFX12-NEXT: global_store_b32 v[6:7], v2, off +; GFX12-NEXT: s_setpc_b64 s[30:31] + %usubo = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) + %sub = extractvalue {i64, i1} %usubo, 0 + %of = extractvalue {i64, i1} %usubo, 1 + %of32 = select i1 %of, i32 1, i32 0 + store i64 %sub, ptr addrspace(1) %res + store i32 %of32, ptr addrspace(1) %carry + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll index aa591d28eb34..c1f3a12dba57 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll @@ -591,3 +591,24 @@ exit: store i32 %ballot, ptr addrspace(1) %out ret void } + +define amdgpu_cs i32 @compare_bfloats(bfloat %x, bfloat %y) { +; GFX10-LABEL: compare_bfloats: +; GFX10: ; %bb.0: +; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX10-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX10-NEXT: v_cmp_gt_f32_e64 s0, v0, v1 +; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: compare_bfloats: +; GFX11: ; %bb.0: +; GFX11-NEXT: v_mov_b16_e32 v2.l, 0 +; GFX11-NEXT: v_mov_b16_e32 v2.h, v1.l +; GFX11-NEXT: v_mov_b16_e32 v1.h, v0.l +; GFX11-NEXT: v_mov_b16_e32 v1.l, v2.l +; GFX11-NEXT: v_cmp_gt_f32_e64 s0, v1, v2 +; GFX11-NEXT: ; return to shader part epilog + %cmp = fcmp ogt bfloat %x, %y + %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %cmp) + ret i32 %ballot +} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll index 30c2c260a327..827a01ff33d0 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll @@ -557,3 +557,15 @@ exit: store i64 %ballot, ptr addrspace(1) %out ret void } + +define amdgpu_cs i64 @compare_bfloats(bfloat %x, bfloat %y) { +; CHECK-LABEL: compare_bfloats: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; CHECK-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; CHECK-NEXT: v_cmp_gt_f32_e64 s[0:1], v0, v1 +; CHECK-NEXT: ; return to shader part epilog + %cmp = fcmp ogt bfloat %x, %y + %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %cmp) + ret i64 %ballot +} diff --git a/llvm/test/CodeGen/ARM/strict-fp-func.ll b/llvm/test/CodeGen/ARM/strict-fp-func.ll new file mode 100644 index 000000000000..39bb2b46bdac --- /dev/null +++ b/llvm/test/CodeGen/ARM/strict-fp-func.ll @@ -0,0 +1,13 @@ +; RUN: llc -mtriple arm-none-eabi -stop-after=finalize-isel %s -o - | FileCheck %s + +define float @func_02(float %x, float %y) strictfp nounwind { + %call = call float @func_01(float %x) strictfp + %res = call float @llvm.experimental.constrained.fadd.f32(float %call, float %y, metadata !"round.dynamic", metadata !"fpexcept.ignore") strictfp + ret float %res +} +; CHECK-LABEL: name: func_02 +; CHECK: BL @func_01, {{.*}}, implicit-def $fpscr_rm + + +declare float @func_01(float) +declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/flog2.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/flog2.ll index 93fcd421e4bd..e02a2e7cce9b 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/flog2.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/flog2.ll @@ -12,8 +12,8 @@ define float @flog2_s(float %x) nounwind { ; ; LA64-LABEL: flog2_s: ; LA64: # %bb.0: -; LA64-NEXT: pcaddu18i $t8, %call36(log2f) -; LA64-NEXT: jr $t8 +; LA64-NEXT: flogb.s $fa0, $fa0 +; LA64-NEXT: ret %y = call float @llvm.log2.f32(float %x) ret float %y } @@ -25,8 +25,8 @@ define double @flog2_d(double %x) nounwind { ; ; LA64-LABEL: flog2_d: ; LA64: # %bb.0: -; LA64-NEXT: pcaddu18i $t8, %call36(log2) -; LA64-NEXT: jr $t8 +; LA64-NEXT: flogb.d $fa0, $fa0 +; LA64-NEXT: ret %y = call double @llvm.log2.f64(double %x) ret double %y } diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll index 2a5a8fa05d64..5c5c19935080 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll @@ -1,14 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 -; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s -; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64 define void @xvavg_b(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK-LABEL: xvavg_b: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1 -; CHECK-NEXT: xvsrai.b $xr0, $xr0, 1 +; CHECK-NEXT: xvavg.b $xr0, $xr0, $xr1 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -25,8 +24,7 @@ define void @xvavg_h(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1 -; CHECK-NEXT: xvsrai.h $xr0, $xr0, 1 +; CHECK-NEXT: xvavg.h $xr0, $xr0, $xr1 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -43,8 +41,7 @@ define void @xvavg_w(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1 -; CHECK-NEXT: xvsrai.w $xr0, $xr0, 1 +; CHECK-NEXT: xvavg.w $xr0, $xr0, $xr1 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -57,14 +54,22 @@ entry: } define void @xvavg_d(ptr %res, ptr %a, ptr %b) nounwind { -; CHECK-LABEL: xvavg_d: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 -; CHECK-NEXT: xvsrai.d $xr0, $xr0, 1 -; CHECK-NEXT: xvst $xr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: xvavg_d: +; LA32: # %bb.0: # %entry +; LA32-NEXT: xvld $xr0, $a1, 0 +; LA32-NEXT: xvld $xr1, $a2, 0 +; LA32-NEXT: xvadd.d $xr0, $xr0, $xr1 +; LA32-NEXT: xvsrai.d $xr0, $xr0, 1 +; LA32-NEXT: xvst $xr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: xvavg_d: +; LA64: # %bb.0: # %entry +; LA64-NEXT: xvld $xr0, $a1, 0 +; LA64-NEXT: xvld $xr1, $a2, 0 +; LA64-NEXT: xvavg.d $xr0, $xr0, $xr1 +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: ret entry: %va = load <4 x i64>, ptr %a %vb = load <4 x i64>, ptr %b @@ -79,8 +84,7 @@ define void @xvavg_bu(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1 -; CHECK-NEXT: xvsrli.b $xr0, $xr0, 1 +; CHECK-NEXT: xvavg.bu $xr0, $xr0, $xr1 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -97,8 +101,7 @@ define void @xvavg_hu(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1 -; CHECK-NEXT: xvsrli.h $xr0, $xr0, 1 +; CHECK-NEXT: xvavg.hu $xr0, $xr0, $xr1 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -115,8 +118,7 @@ define void @xvavg_wu(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1 -; CHECK-NEXT: xvsrli.w $xr0, $xr0, 1 +; CHECK-NEXT: xvavg.wu $xr0, $xr0, $xr1 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -129,14 +131,22 @@ entry: } define void @xvavg_du(ptr %res, ptr %a, ptr %b) nounwind { -; CHECK-LABEL: xvavg_du: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 -; CHECK-NEXT: xvsrli.d $xr0, $xr0, 1 -; CHECK-NEXT: xvst $xr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: xvavg_du: +; LA32: # %bb.0: # %entry +; LA32-NEXT: xvld $xr0, $a1, 0 +; LA32-NEXT: xvld $xr1, $a2, 0 +; LA32-NEXT: xvadd.d $xr0, $xr0, $xr1 +; LA32-NEXT: xvsrli.d $xr0, $xr0, 1 +; LA32-NEXT: xvst $xr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: xvavg_du: +; LA64: # %bb.0: # %entry +; LA64-NEXT: xvld $xr0, $a1, 0 +; LA64-NEXT: xvld $xr1, $a2, 0 +; LA64-NEXT: xvavg.du $xr0, $xr0, $xr1 +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: ret entry: %va = load <4 x i64>, ptr %a %vb = load <4 x i64>, ptr %b @@ -151,9 +161,7 @@ define void @xvavgr_b(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1 -; CHECK-NEXT: xvaddi.bu $xr0, $xr0, 1 -; CHECK-NEXT: xvsrai.b $xr0, $xr0, 1 +; CHECK-NEXT: xvavgr.b $xr0, $xr0, $xr1 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -171,9 +179,7 @@ define void @xvavgr_h(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1 -; CHECK-NEXT: xvaddi.hu $xr0, $xr0, 1 -; CHECK-NEXT: xvsrai.h $xr0, $xr0, 1 +; CHECK-NEXT: xvavgr.h $xr0, $xr0, $xr1 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -191,9 +197,7 @@ define void @xvavgr_w(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1 -; CHECK-NEXT: xvaddi.wu $xr0, $xr0, 1 -; CHECK-NEXT: xvsrai.w $xr0, $xr0, 1 +; CHECK-NEXT: xvavgr.w $xr0, $xr0, $xr1 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -207,15 +211,23 @@ entry: } define void @xvavgr_d(ptr %res, ptr %a, ptr %b) nounwind { -; CHECK-LABEL: xvavgr_d: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 -; CHECK-NEXT: xvaddi.du $xr0, $xr0, 1 -; CHECK-NEXT: xvsrai.d $xr0, $xr0, 1 -; CHECK-NEXT: xvst $xr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: xvavgr_d: +; LA32: # %bb.0: # %entry +; LA32-NEXT: xvld $xr0, $a1, 0 +; LA32-NEXT: xvld $xr1, $a2, 0 +; LA32-NEXT: xvadd.d $xr0, $xr0, $xr1 +; LA32-NEXT: xvaddi.du $xr0, $xr0, 1 +; LA32-NEXT: xvsrai.d $xr0, $xr0, 1 +; LA32-NEXT: xvst $xr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: xvavgr_d: +; LA64: # %bb.0: # %entry +; LA64-NEXT: xvld $xr0, $a1, 0 +; LA64-NEXT: xvld $xr1, $a2, 0 +; LA64-NEXT: xvavgr.d $xr0, $xr0, $xr1 +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: ret entry: %va = load <4 x i64>, ptr %a %vb = load <4 x i64>, ptr %b @@ -231,9 +243,7 @@ define void @xvavgr_bu(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1 -; CHECK-NEXT: xvaddi.bu $xr0, $xr0, 1 -; CHECK-NEXT: xvsrli.b $xr0, $xr0, 1 +; CHECK-NEXT: xvavgr.bu $xr0, $xr0, $xr1 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -251,9 +261,7 @@ define void @xvavgr_hu(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1 -; CHECK-NEXT: xvaddi.hu $xr0, $xr0, 1 -; CHECK-NEXT: xvsrli.h $xr0, $xr0, 1 +; CHECK-NEXT: xvavgr.hu $xr0, $xr0, $xr1 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -271,9 +279,7 @@ define void @xvavgr_wu(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1 -; CHECK-NEXT: xvaddi.wu $xr0, $xr0, 1 -; CHECK-NEXT: xvsrli.w $xr0, $xr0, 1 +; CHECK-NEXT: xvavgr.wu $xr0, $xr0, $xr1 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -287,15 +293,23 @@ entry: } define void @xvavgr_du(ptr %res, ptr %a, ptr %b) nounwind { -; CHECK-LABEL: xvavgr_du: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 -; CHECK-NEXT: xvaddi.du $xr0, $xr0, 1 -; CHECK-NEXT: xvsrli.d $xr0, $xr0, 1 -; CHECK-NEXT: xvst $xr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: xvavgr_du: +; LA32: # %bb.0: # %entry +; LA32-NEXT: xvld $xr0, $a1, 0 +; LA32-NEXT: xvld $xr1, $a2, 0 +; LA32-NEXT: xvadd.d $xr0, $xr0, $xr1 +; LA32-NEXT: xvaddi.du $xr0, $xr0, 1 +; LA32-NEXT: xvsrli.d $xr0, $xr0, 1 +; LA32-NEXT: xvst $xr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: xvavgr_du: +; LA64: # %bb.0: # %entry +; LA64-NEXT: xvld $xr0, $a1, 0 +; LA64-NEXT: xvld $xr1, $a2, 0 +; LA64-NEXT: xvavgr.du $xr0, $xr0, $xr1 +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: ret entry: %va = load <4 x i64>, ptr %a %vb = load <4 x i64>, ptr %b diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/flog2.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/flog2.ll index 68f2e3ab488e..6b5f5751e570 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/flog2.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/flog2.ll @@ -1,166 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 -; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefix=LA32 -; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefix=LA64 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <8 x float> @llvm.log2.v8f32(<8 x float>) declare <4 x double> @llvm.log2.v4f64(<4 x double>) define void @flog2_v8f32(ptr %res, ptr %a) nounwind { -; LA32-LABEL: flog2_v8f32: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -128 -; LA32-NEXT: st.w $ra, $sp, 124 # 4-byte Folded Spill -; LA32-NEXT: st.w $fp, $sp, 120 # 4-byte Folded Spill -; LA32-NEXT: xvld $xr0, $a1, 0 -; LA32-NEXT: xvst $xr0, $sp, 80 # 32-byte Folded Spill -; LA32-NEXT: move $fp, $a0 -; LA32-NEXT: xvpickve.w $xr0, $xr0, 5 -; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0 -; LA32-NEXT: bl log2f -; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA32-NEXT: vst $vr0, $sp, 48 # 16-byte Folded Spill -; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA32-NEXT: xvpickve.w $xr0, $xr0, 4 -; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0 -; LA32-NEXT: bl log2f -; LA32-NEXT: # kill: def $f0 killed $f0 def $xr0 -; LA32-NEXT: vld $vr1, $sp, 48 # 16-byte Folded Reload -; LA32-NEXT: vextrins.w $vr0, $vr1, 16 -; LA32-NEXT: xvst $xr0, $sp, 48 # 32-byte Folded Spill -; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA32-NEXT: xvpickve.w $xr0, $xr0, 6 -; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0 -; LA32-NEXT: bl log2f -; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA32-NEXT: xvld $xr1, $sp, 48 # 32-byte Folded Reload -; LA32-NEXT: vextrins.w $vr1, $vr0, 32 -; LA32-NEXT: xvst $xr1, $sp, 48 # 32-byte Folded Spill -; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA32-NEXT: xvpickve.w $xr0, $xr0, 7 -; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0 -; LA32-NEXT: bl log2f -; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA32-NEXT: xvld $xr1, $sp, 48 # 32-byte Folded Reload -; LA32-NEXT: vextrins.w $vr1, $vr0, 48 -; LA32-NEXT: xvst $xr1, $sp, 48 # 32-byte Folded Spill -; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA32-NEXT: xvpickve.w $xr0, $xr0, 1 -; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0 -; LA32-NEXT: bl log2f -; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA32-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill -; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA32-NEXT: xvpickve.w $xr0, $xr0, 0 -; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0 -; LA32-NEXT: bl log2f -; LA32-NEXT: # kill: def $f0 killed $f0 def $xr0 -; LA32-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload -; LA32-NEXT: vextrins.w $vr0, $vr1, 16 -; LA32-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill -; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA32-NEXT: xvpickve.w $xr0, $xr0, 2 -; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0 -; LA32-NEXT: bl log2f -; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA32-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload -; LA32-NEXT: vextrins.w $vr1, $vr0, 32 -; LA32-NEXT: xvst $xr1, $sp, 16 # 32-byte Folded Spill -; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA32-NEXT: xvpickve.w $xr0, $xr0, 3 -; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0 -; LA32-NEXT: bl log2f -; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA32-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload -; LA32-NEXT: vextrins.w $vr1, $vr0, 48 -; LA32-NEXT: xvld $xr0, $sp, 48 # 32-byte Folded Reload -; LA32-NEXT: xvpermi.q $xr1, $xr0, 2 -; LA32-NEXT: xvst $xr1, $fp, 0 -; LA32-NEXT: ld.w $fp, $sp, 120 # 4-byte Folded Reload -; LA32-NEXT: ld.w $ra, $sp, 124 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 128 -; LA32-NEXT: ret -; -; LA64-LABEL: flog2_v8f32: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -128 -; LA64-NEXT: st.d $ra, $sp, 120 # 8-byte Folded Spill -; LA64-NEXT: st.d $fp, $sp, 112 # 8-byte Folded Spill -; LA64-NEXT: xvld $xr0, $a1, 0 -; LA64-NEXT: xvst $xr0, $sp, 80 # 32-byte Folded Spill -; LA64-NEXT: move $fp, $a0 -; LA64-NEXT: xvpickve.w $xr0, $xr0, 5 -; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0 -; LA64-NEXT: pcaddu18i $ra, %call36(log2f) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA64-NEXT: vst $vr0, $sp, 48 # 16-byte Folded Spill -; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA64-NEXT: xvpickve.w $xr0, $xr0, 4 -; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0 -; LA64-NEXT: pcaddu18i $ra, %call36(log2f) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0 killed $f0 def $xr0 -; LA64-NEXT: vld $vr1, $sp, 48 # 16-byte Folded Reload -; LA64-NEXT: vextrins.w $vr0, $vr1, 16 -; LA64-NEXT: xvst $xr0, $sp, 48 # 32-byte Folded Spill -; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA64-NEXT: xvpickve.w $xr0, $xr0, 6 -; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0 -; LA64-NEXT: pcaddu18i $ra, %call36(log2f) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA64-NEXT: xvld $xr1, $sp, 48 # 32-byte Folded Reload -; LA64-NEXT: vextrins.w $vr1, $vr0, 32 -; LA64-NEXT: xvst $xr1, $sp, 48 # 32-byte Folded Spill -; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA64-NEXT: xvpickve.w $xr0, $xr0, 7 -; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0 -; LA64-NEXT: pcaddu18i $ra, %call36(log2f) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA64-NEXT: xvld $xr1, $sp, 48 # 32-byte Folded Reload -; LA64-NEXT: vextrins.w $vr1, $vr0, 48 -; LA64-NEXT: xvst $xr1, $sp, 48 # 32-byte Folded Spill -; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA64-NEXT: xvpickve.w $xr0, $xr0, 1 -; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0 -; LA64-NEXT: pcaddu18i $ra, %call36(log2f) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill -; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA64-NEXT: xvpickve.w $xr0, $xr0, 0 -; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0 -; LA64-NEXT: pcaddu18i $ra, %call36(log2f) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0 killed $f0 def $xr0 -; LA64-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload -; LA64-NEXT: vextrins.w $vr0, $vr1, 16 -; LA64-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill -; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA64-NEXT: xvpickve.w $xr0, $xr0, 2 -; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0 -; LA64-NEXT: pcaddu18i $ra, %call36(log2f) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA64-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload -; LA64-NEXT: vextrins.w $vr1, $vr0, 32 -; LA64-NEXT: xvst $xr1, $sp, 16 # 32-byte Folded Spill -; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA64-NEXT: xvpickve.w $xr0, $xr0, 3 -; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0 -; LA64-NEXT: pcaddu18i $ra, %call36(log2f) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA64-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload -; LA64-NEXT: vextrins.w $vr1, $vr0, 48 -; LA64-NEXT: xvld $xr0, $sp, 48 # 32-byte Folded Reload -; LA64-NEXT: xvpermi.q $xr1, $xr0, 2 -; LA64-NEXT: xvst $xr1, $fp, 0 -; LA64-NEXT: ld.d $fp, $sp, 112 # 8-byte Folded Reload -; LA64-NEXT: ld.d $ra, $sp, 120 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 128 -; LA64-NEXT: ret +; CHECK-LABEL: flog2_v8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvflogb.s $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret entry: %v = load <8 x float>, ptr %a %r = call <8 x float> @llvm.log2.v8f32(<8 x float> %v) @@ -169,93 +20,12 @@ entry: } define void @flog2_v4f64(ptr %res, ptr %a) nounwind { -; LA32-LABEL: flog2_v4f64: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -112 -; LA32-NEXT: st.w $ra, $sp, 108 # 4-byte Folded Spill -; LA32-NEXT: st.w $fp, $sp, 104 # 4-byte Folded Spill -; LA32-NEXT: xvld $xr0, $a1, 0 -; LA32-NEXT: xvst $xr0, $sp, 64 # 32-byte Folded Spill -; LA32-NEXT: move $fp, $a0 -; LA32-NEXT: xvpickve.d $xr0, $xr0, 3 -; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0 -; LA32-NEXT: bl log2 -; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 -; LA32-NEXT: vst $vr0, $sp, 32 # 16-byte Folded Spill -; LA32-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload -; LA32-NEXT: xvpickve.d $xr0, $xr0, 2 -; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0 -; LA32-NEXT: bl log2 -; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0 -; LA32-NEXT: vld $vr1, $sp, 32 # 16-byte Folded Reload -; LA32-NEXT: vextrins.d $vr0, $vr1, 16 -; LA32-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill -; LA32-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload -; LA32-NEXT: xvpickve.d $xr0, $xr0, 1 -; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0 -; LA32-NEXT: bl log2 -; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 -; LA32-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill -; LA32-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload -; LA32-NEXT: xvpickve.d $xr0, $xr0, 0 -; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0 -; LA32-NEXT: bl log2 -; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0 -; LA32-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload -; LA32-NEXT: vextrins.d $vr0, $vr1, 16 -; LA32-NEXT: xvld $xr1, $sp, 32 # 32-byte Folded Reload -; LA32-NEXT: xvpermi.q $xr0, $xr1, 2 -; LA32-NEXT: xvst $xr0, $fp, 0 -; LA32-NEXT: ld.w $fp, $sp, 104 # 4-byte Folded Reload -; LA32-NEXT: ld.w $ra, $sp, 108 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 112 -; LA32-NEXT: ret -; -; LA64-LABEL: flog2_v4f64: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -112 -; LA64-NEXT: st.d $ra, $sp, 104 # 8-byte Folded Spill -; LA64-NEXT: st.d $fp, $sp, 96 # 8-byte Folded Spill -; LA64-NEXT: xvld $xr0, $a1, 0 -; LA64-NEXT: xvst $xr0, $sp, 64 # 32-byte Folded Spill -; LA64-NEXT: move $fp, $a0 -; LA64-NEXT: xvpickve.d $xr0, $xr0, 3 -; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0 -; LA64-NEXT: pcaddu18i $ra, %call36(log2) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 -; LA64-NEXT: vst $vr0, $sp, 32 # 16-byte Folded Spill -; LA64-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload -; LA64-NEXT: xvpickve.d $xr0, $xr0, 2 -; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0 -; LA64-NEXT: pcaddu18i $ra, %call36(log2) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0 -; LA64-NEXT: vld $vr1, $sp, 32 # 16-byte Folded Reload -; LA64-NEXT: vextrins.d $vr0, $vr1, 16 -; LA64-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill -; LA64-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload -; LA64-NEXT: xvpickve.d $xr0, $xr0, 1 -; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0 -; LA64-NEXT: pcaddu18i $ra, %call36(log2) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 -; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill -; LA64-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload -; LA64-NEXT: xvpickve.d $xr0, $xr0, 0 -; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0 -; LA64-NEXT: pcaddu18i $ra, %call36(log2) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0 -; LA64-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload -; LA64-NEXT: vextrins.d $vr0, $vr1, 16 -; LA64-NEXT: xvld $xr1, $sp, 32 # 32-byte Folded Reload -; LA64-NEXT: xvpermi.q $xr0, $xr1, 2 -; LA64-NEXT: xvst $xr0, $fp, 0 -; LA64-NEXT: ld.d $fp, $sp, 96 # 8-byte Folded Reload -; LA64-NEXT: ld.d $ra, $sp, 104 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 112 -; LA64-NEXT: ret +; CHECK-LABEL: flog2_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvflogb.d $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret entry: %v = load <4 x double>, ptr %a %r = call <4 x double> @llvm.log2.v4f64(<4 x double> %v) diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll index 20b8898436cc..334af22edee5 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll @@ -1,14 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 -; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s -; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64 define void @vavg_b(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK-LABEL: vavg_b: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 ; CHECK-NEXT: vld $vr1, $a2, 0 -; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1 -; CHECK-NEXT: vsrai.b $vr0, $vr0, 1 +; CHECK-NEXT: vavg.b $vr0, $vr0, $vr1 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -25,8 +24,7 @@ define void @vavg_h(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 ; CHECK-NEXT: vld $vr1, $a2, 0 -; CHECK-NEXT: vadd.h $vr0, $vr0, $vr1 -; CHECK-NEXT: vsrai.h $vr0, $vr0, 1 +; CHECK-NEXT: vavg.h $vr0, $vr0, $vr1 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -43,8 +41,7 @@ define void @vavg_w(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 ; CHECK-NEXT: vld $vr1, $a2, 0 -; CHECK-NEXT: vadd.w $vr0, $vr0, $vr1 -; CHECK-NEXT: vsrai.w $vr0, $vr0, 1 +; CHECK-NEXT: vavg.w $vr0, $vr0, $vr1 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -57,14 +54,22 @@ entry: } define void @vavg_d(ptr %res, ptr %a, ptr %b) nounwind { -; CHECK-LABEL: vavg_d: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vld $vr0, $a1, 0 -; CHECK-NEXT: vld $vr1, $a2, 0 -; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 -; CHECK-NEXT: vsrai.d $vr0, $vr0, 1 -; CHECK-NEXT: vst $vr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: vavg_d: +; LA32: # %bb.0: # %entry +; LA32-NEXT: vld $vr0, $a1, 0 +; LA32-NEXT: vld $vr1, $a2, 0 +; LA32-NEXT: vadd.d $vr0, $vr0, $vr1 +; LA32-NEXT: vsrai.d $vr0, $vr0, 1 +; LA32-NEXT: vst $vr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: vavg_d: +; LA64: # %bb.0: # %entry +; LA64-NEXT: vld $vr0, $a1, 0 +; LA64-NEXT: vld $vr1, $a2, 0 +; LA64-NEXT: vavg.d $vr0, $vr0, $vr1 +; LA64-NEXT: vst $vr0, $a0, 0 +; LA64-NEXT: ret entry: %va = load <2 x i64>, ptr %a %vb = load <2 x i64>, ptr %b @@ -79,8 +84,7 @@ define void @vavg_bu(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 ; CHECK-NEXT: vld $vr1, $a2, 0 -; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1 -; CHECK-NEXT: vsrli.b $vr0, $vr0, 1 +; CHECK-NEXT: vavg.bu $vr0, $vr0, $vr1 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -97,8 +101,7 @@ define void @vavg_hu(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 ; CHECK-NEXT: vld $vr1, $a2, 0 -; CHECK-NEXT: vadd.h $vr0, $vr0, $vr1 -; CHECK-NEXT: vsrli.h $vr0, $vr0, 1 +; CHECK-NEXT: vavg.hu $vr0, $vr0, $vr1 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -115,8 +118,7 @@ define void @vavg_wu(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 ; CHECK-NEXT: vld $vr1, $a2, 0 -; CHECK-NEXT: vadd.w $vr0, $vr0, $vr1 -; CHECK-NEXT: vsrli.w $vr0, $vr0, 1 +; CHECK-NEXT: vavg.wu $vr0, $vr0, $vr1 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -129,14 +131,22 @@ entry: } define void @vavg_du(ptr %res, ptr %a, ptr %b) nounwind { -; CHECK-LABEL: vavg_du: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vld $vr0, $a1, 0 -; CHECK-NEXT: vld $vr1, $a2, 0 -; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 -; CHECK-NEXT: vsrli.d $vr0, $vr0, 1 -; CHECK-NEXT: vst $vr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: vavg_du: +; LA32: # %bb.0: # %entry +; LA32-NEXT: vld $vr0, $a1, 0 +; LA32-NEXT: vld $vr1, $a2, 0 +; LA32-NEXT: vadd.d $vr0, $vr0, $vr1 +; LA32-NEXT: vsrli.d $vr0, $vr0, 1 +; LA32-NEXT: vst $vr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: vavg_du: +; LA64: # %bb.0: # %entry +; LA64-NEXT: vld $vr0, $a1, 0 +; LA64-NEXT: vld $vr1, $a2, 0 +; LA64-NEXT: vavg.du $vr0, $vr0, $vr1 +; LA64-NEXT: vst $vr0, $a0, 0 +; LA64-NEXT: ret entry: %va = load <2 x i64>, ptr %a %vb = load <2 x i64>, ptr %b @@ -151,9 +161,7 @@ define void @vavgr_b(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 ; CHECK-NEXT: vld $vr1, $a2, 0 -; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1 -; CHECK-NEXT: vaddi.bu $vr0, $vr0, 1 -; CHECK-NEXT: vsrai.b $vr0, $vr0, 1 +; CHECK-NEXT: vavgr.b $vr0, $vr0, $vr1 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -171,9 +179,7 @@ define void @vavgr_h(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 ; CHECK-NEXT: vld $vr1, $a2, 0 -; CHECK-NEXT: vadd.h $vr0, $vr0, $vr1 -; CHECK-NEXT: vaddi.hu $vr0, $vr0, 1 -; CHECK-NEXT: vsrai.h $vr0, $vr0, 1 +; CHECK-NEXT: vavgr.h $vr0, $vr0, $vr1 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -191,9 +197,7 @@ define void @vavgr_w(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 ; CHECK-NEXT: vld $vr1, $a2, 0 -; CHECK-NEXT: vadd.w $vr0, $vr0, $vr1 -; CHECK-NEXT: vaddi.wu $vr0, $vr0, 1 -; CHECK-NEXT: vsrai.w $vr0, $vr0, 1 +; CHECK-NEXT: vavgr.w $vr0, $vr0, $vr1 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -207,15 +211,23 @@ entry: } define void @vavgr_d(ptr %res, ptr %a, ptr %b) nounwind { -; CHECK-LABEL: vavgr_d: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vld $vr0, $a1, 0 -; CHECK-NEXT: vld $vr1, $a2, 0 -; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 -; CHECK-NEXT: vaddi.du $vr0, $vr0, 1 -; CHECK-NEXT: vsrai.d $vr0, $vr0, 1 -; CHECK-NEXT: vst $vr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: vavgr_d: +; LA32: # %bb.0: # %entry +; LA32-NEXT: vld $vr0, $a1, 0 +; LA32-NEXT: vld $vr1, $a2, 0 +; LA32-NEXT: vadd.d $vr0, $vr0, $vr1 +; LA32-NEXT: vaddi.du $vr0, $vr0, 1 +; LA32-NEXT: vsrai.d $vr0, $vr0, 1 +; LA32-NEXT: vst $vr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: vavgr_d: +; LA64: # %bb.0: # %entry +; LA64-NEXT: vld $vr0, $a1, 0 +; LA64-NEXT: vld $vr1, $a2, 0 +; LA64-NEXT: vavgr.d $vr0, $vr0, $vr1 +; LA64-NEXT: vst $vr0, $a0, 0 +; LA64-NEXT: ret entry: %va = load <2 x i64>, ptr %a %vb = load <2 x i64>, ptr %b @@ -231,9 +243,7 @@ define void @vavgr_bu(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 ; CHECK-NEXT: vld $vr1, $a2, 0 -; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1 -; CHECK-NEXT: vaddi.bu $vr0, $vr0, 1 -; CHECK-NEXT: vsrli.b $vr0, $vr0, 1 +; CHECK-NEXT: vavgr.bu $vr0, $vr0, $vr1 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -251,9 +261,7 @@ define void @vavgr_hu(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 ; CHECK-NEXT: vld $vr1, $a2, 0 -; CHECK-NEXT: vadd.h $vr0, $vr0, $vr1 -; CHECK-NEXT: vaddi.hu $vr0, $vr0, 1 -; CHECK-NEXT: vsrli.h $vr0, $vr0, 1 +; CHECK-NEXT: vavgr.hu $vr0, $vr0, $vr1 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -271,9 +279,7 @@ define void @vavgr_wu(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 ; CHECK-NEXT: vld $vr1, $a2, 0 -; CHECK-NEXT: vadd.w $vr0, $vr0, $vr1 -; CHECK-NEXT: vaddi.wu $vr0, $vr0, 1 -; CHECK-NEXT: vsrli.w $vr0, $vr0, 1 +; CHECK-NEXT: vavgr.wu $vr0, $vr0, $vr1 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -287,15 +293,23 @@ entry: } define void @vavgr_du(ptr %res, ptr %a, ptr %b) nounwind { -; CHECK-LABEL: vavgr_du: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vld $vr0, $a1, 0 -; CHECK-NEXT: vld $vr1, $a2, 0 -; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 -; CHECK-NEXT: vaddi.du $vr0, $vr0, 1 -; CHECK-NEXT: vsrli.d $vr0, $vr0, 1 -; CHECK-NEXT: vst $vr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: vavgr_du: +; LA32: # %bb.0: # %entry +; LA32-NEXT: vld $vr0, $a1, 0 +; LA32-NEXT: vld $vr1, $a2, 0 +; LA32-NEXT: vadd.d $vr0, $vr0, $vr1 +; LA32-NEXT: vaddi.du $vr0, $vr0, 1 +; LA32-NEXT: vsrli.d $vr0, $vr0, 1 +; LA32-NEXT: vst $vr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: vavgr_du: +; LA64: # %bb.0: # %entry +; LA64-NEXT: vld $vr0, $a1, 0 +; LA64-NEXT: vld $vr1, $a2, 0 +; LA64-NEXT: vavgr.du $vr0, $vr0, $vr1 +; LA64-NEXT: vst $vr0, $a0, 0 +; LA64-NEXT: ret entry: %va = load <2 x i64>, ptr %a %vb = load <2 x i64>, ptr %b diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/flog2.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/flog2.ll index e5e75ec617b5..87cc7c6dbc70 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/flog2.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/flog2.ll @@ -1,98 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 -; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefix=LA32 -; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefix=LA64 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <4 x float> @llvm.log2.v4f32(<4 x float>) declare <2 x double> @llvm.log2.v2f64(<2 x double>) define void @flog2_v4f32(ptr %res, ptr %a) nounwind { -; LA32-LABEL: flog2_v4f32: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -48 -; LA32-NEXT: st.w $ra, $sp, 44 # 4-byte Folded Spill -; LA32-NEXT: st.w $fp, $sp, 40 # 4-byte Folded Spill -; LA32-NEXT: vld $vr0, $a1, 0 -; LA32-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill -; LA32-NEXT: move $fp, $a0 -; LA32-NEXT: vreplvei.w $vr0, $vr0, 1 -; LA32-NEXT: # kill: def $f0 killed $f0 killed $vr0 -; LA32-NEXT: bl log2f -; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA32-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill -; LA32-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload -; LA32-NEXT: vreplvei.w $vr0, $vr0, 0 -; LA32-NEXT: # kill: def $f0 killed $f0 killed $vr0 -; LA32-NEXT: bl log2f -; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA32-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload -; LA32-NEXT: vextrins.w $vr0, $vr1, 16 -; LA32-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill -; LA32-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload -; LA32-NEXT: vreplvei.w $vr0, $vr0, 2 -; LA32-NEXT: # kill: def $f0 killed $f0 killed $vr0 -; LA32-NEXT: bl log2f -; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA32-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload -; LA32-NEXT: vextrins.w $vr1, $vr0, 32 -; LA32-NEXT: vst $vr1, $sp, 0 # 16-byte Folded Spill -; LA32-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload -; LA32-NEXT: vreplvei.w $vr0, $vr0, 3 -; LA32-NEXT: # kill: def $f0 killed $f0 killed $vr0 -; LA32-NEXT: bl log2f -; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA32-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload -; LA32-NEXT: vextrins.w $vr1, $vr0, 48 -; LA32-NEXT: vst $vr1, $fp, 0 -; LA32-NEXT: ld.w $fp, $sp, 40 # 4-byte Folded Reload -; LA32-NEXT: ld.w $ra, $sp, 44 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 48 -; LA32-NEXT: ret -; -; LA64-LABEL: flog2_v4f32: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -48 -; LA64-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill -; LA64-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill -; LA64-NEXT: vld $vr0, $a1, 0 -; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill -; LA64-NEXT: move $fp, $a0 -; LA64-NEXT: vreplvei.w $vr0, $vr0, 1 -; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0 -; LA64-NEXT: pcaddu18i $ra, %call36(log2f) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA64-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill -; LA64-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload -; LA64-NEXT: vreplvei.w $vr0, $vr0, 0 -; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0 -; LA64-NEXT: pcaddu18i $ra, %call36(log2f) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA64-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload -; LA64-NEXT: vextrins.w $vr0, $vr1, 16 -; LA64-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill -; LA64-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload -; LA64-NEXT: vreplvei.w $vr0, $vr0, 2 -; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0 -; LA64-NEXT: pcaddu18i $ra, %call36(log2f) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA64-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload -; LA64-NEXT: vextrins.w $vr1, $vr0, 32 -; LA64-NEXT: vst $vr1, $sp, 0 # 16-byte Folded Spill -; LA64-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload -; LA64-NEXT: vreplvei.w $vr0, $vr0, 3 -; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0 -; LA64-NEXT: pcaddu18i $ra, %call36(log2f) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA64-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload -; LA64-NEXT: vextrins.w $vr1, $vr0, 48 -; LA64-NEXT: vst $vr1, $fp, 0 -; LA64-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload -; LA64-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 48 -; LA64-NEXT: ret +; CHECK-LABEL: flog2_v4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vflogb.s $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret entry: %v = load <4 x float>, ptr %a %r = call <4 x float> @llvm.log2.v4f32(<4 x float> %v) @@ -101,59 +20,12 @@ entry: } define void @flog2_v2f64(ptr %res, ptr %a) nounwind { -; LA32-LABEL: flog2_v2f64: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -48 -; LA32-NEXT: st.w $ra, $sp, 44 # 4-byte Folded Spill -; LA32-NEXT: st.w $fp, $sp, 40 # 4-byte Folded Spill -; LA32-NEXT: vld $vr0, $a1, 0 -; LA32-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill -; LA32-NEXT: move $fp, $a0 -; LA32-NEXT: vreplvei.d $vr0, $vr0, 1 -; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0 -; LA32-NEXT: bl log2 -; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 -; LA32-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill -; LA32-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload -; LA32-NEXT: vreplvei.d $vr0, $vr0, 0 -; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0 -; LA32-NEXT: bl log2 -; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 -; LA32-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload -; LA32-NEXT: vextrins.d $vr0, $vr1, 16 -; LA32-NEXT: vst $vr0, $fp, 0 -; LA32-NEXT: ld.w $fp, $sp, 40 # 4-byte Folded Reload -; LA32-NEXT: ld.w $ra, $sp, 44 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 48 -; LA32-NEXT: ret -; -; LA64-LABEL: flog2_v2f64: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -48 -; LA64-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill -; LA64-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill -; LA64-NEXT: vld $vr0, $a1, 0 -; LA64-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill -; LA64-NEXT: move $fp, $a0 -; LA64-NEXT: vreplvei.d $vr0, $vr0, 1 -; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0 -; LA64-NEXT: pcaddu18i $ra, %call36(log2) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 -; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill -; LA64-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload -; LA64-NEXT: vreplvei.d $vr0, $vr0, 0 -; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0 -; LA64-NEXT: pcaddu18i $ra, %call36(log2) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 -; LA64-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload -; LA64-NEXT: vextrins.d $vr0, $vr1, 16 -; LA64-NEXT: vst $vr0, $fp, 0 -; LA64-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload -; LA64-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 48 -; LA64-NEXT: ret +; CHECK-LABEL: flog2_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vflogb.d $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret entry: %v = load <2 x double>, ptr %a %r = call <2 x double> @llvm.log2.v2f64(<2 x double> %v) diff --git a/llvm/test/CodeGen/RISCV/rv64-stackmap.ll b/llvm/test/CodeGen/RISCV/rv64-stackmap.ll index d07f608bf789..c3183a1a3e03 100644 --- a/llvm/test/CodeGen/RISCV/rv64-stackmap.ll +++ b/llvm/test/CodeGen/RISCV/rv64-stackmap.ll @@ -290,9 +290,9 @@ define void @liveConstant() { ; CHECK-NEXT: .half 2 ; CHECK-NEXT: .half 0 ; CHECK-NEXT: .word -define void @spilledValue(i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27) { +define void @spilledValue(i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i8 %l25, i16 zeroext %l26, i32 signext %l27) { entry: - call void (i64, i32, ptr, i32, ...) @llvm.experimental.patchpoint.void(i64 11, i32 28, ptr null, i32 5, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27) + call void (i64, i32, ptr, i32, ...) @llvm.experimental.patchpoint.void(i64 11, i32 28, ptr null, i32 5, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i8 %l25, i16 %l26, i32 %l27) ret void } diff --git a/llvm/test/CodeGen/RISCV/rv64p.ll b/llvm/test/CodeGen/RISCV/rv64p.ll index cb07f945a582..f937f44f1332 100644 --- a/llvm/test/CodeGen/RISCV/rv64p.ll +++ b/llvm/test/CodeGen/RISCV/rv64p.ll @@ -297,8 +297,7 @@ declare i32 @llvm.abs.i32(i32, i1 immarg) define i32 @abs_i32(i32 %x) { ; CHECK-LABEL: abs_i32: ; CHECK: # %bb.0: -; CHECK-NEXT: sext.w a0, a0 -; CHECK-NEXT: abs a0, a0 +; CHECK-NEXT: absw a0, a0 ; CHECK-NEXT: ret %abs = tail call i32 @llvm.abs.i32(i32 %x, i1 true) ret i32 %abs @@ -307,8 +306,7 @@ define i32 @abs_i32(i32 %x) { define signext i32 @abs_i32_sext(i32 signext %x) { ; CHECK-LABEL: abs_i32_sext: ; CHECK: # %bb.0: -; CHECK-NEXT: abs a0, a0 -; CHECK-NEXT: sext.w a0, a0 +; CHECK-NEXT: absw a0, a0 ; CHECK-NEXT: ret %abs = tail call i32 @llvm.abs.i32(i32 %x, i1 true) ret i32 %abs diff --git a/llvm/test/CodeGen/X86/basic-block-sections-bb-hash.ll b/llvm/test/CodeGen/X86/basic-block-sections-bb-hash.ll new file mode 100644 index 000000000000..293b48d7dc5d --- /dev/null +++ b/llvm/test/CodeGen/X86/basic-block-sections-bb-hash.ll @@ -0,0 +1,39 @@ +;; BB section test with basic block hashes. + +;; basic block sections Profile with bb hashes +; RUN: echo 'v1' > %t +; RUN: echo 'f foo' >> %t +; RUN: echo 'g 0:10,1:9,2:1 1:8,3:8 2:2,3:2 3:11' >> %t +; RUN: echo 'c 0 2 3' >> %t +; RUN: echo 'h 0:64863A11B5CA0000 1:54F1E80D6B270006 2:54F1F4E66B270008 3:C8BC6041A2CB0009' >> %t +; RUN: llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t | FileCheck %s +; +define void @foo(i1 zeroext) nounwind { + %2 = alloca i8, align 1 + %3 = zext i1 %0 to i8 + store i8 %3, ptr %2, align 1 + %4 = load i8, ptr %2, align 1 + %5 = trunc i8 %4 to i1 + br i1 %5, label %6, label %8 + +6: ; preds = %1 + %7 = call i32 @bar() + br label %10 + +8: ; preds = %1 + %9 = call i32 @baz() + br label %10 + +10: ; preds = %8, %6 + ret void +} + +declare i32 @bar() #1 + +declare i32 @baz() #1 + +; CHECK: .section .text.foo,"ax",@progbits +; CHECK: callq baz +; CHECK: retq +; CHECK: .section .text.split.foo,"ax",@progbits +; CHECK: callq bar diff --git a/llvm/test/CodeGen/X86/basic-block-sections-clusters-error.ll b/llvm/test/CodeGen/X86/basic-block-sections-clusters-error.ll index 751ab76722c0..eb0a14b2820b 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-clusters-error.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-clusters-error.ll @@ -69,6 +69,20 @@ ; RUN: echo 'g 0:4,1:2:3' >> %t15 ; RUN: not --crash llc < %s -O0 -mtriple=x86_64 -function-sections -basic-block-sections=%t15 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR15 ; CHECK-ERROR15: LLVM ERROR: invalid profile {{.*}} at line 4: unsigned integer expected: '2:3' +; RUN: echo 'v1' > %t16 +; RUN: echo 'f dummy1' >> %t16 +; RUN: echo 'c 0 1' >> %t16 +; RUN: echo 'g 0:4,1:2' >> %t16 +; RUN: echo 'h a:1111111111111111 1:ffffffffffffffff' >> %t16 +; RUN: not --crash llc < %s -O0 -mtriple=x86_64 -function-sections -basic-block-sections=%t16 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR16 +; CHECK-ERROR16: LLVM ERROR: invalid profile {{.*}} at line 5: unsigned integer expected: 'a' +; RUN: echo 'v1' > %t17 +; RUN: echo 'f dummy1' >> %t17 +; RUN: echo 'c 0 1' >> %t17 +; RUN: echo 'g 0:4,1:2' >> %t17 +; RUN: echo 'h 0:111111111111111g 1:ffffffffffffffff' >> %t17 +; RUN: not --crash llc < %s -O0 -mtriple=x86_64 -function-sections -basic-block-sections=%t17 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR17 +; CHECK-ERROR17: LLVM ERROR: invalid profile {{.*}} at line 5: unsigned integer expected in hex format: '111111111111111g' define i32 @dummy1(i32 %x, i32 %y, i32 %z) { diff --git a/llvm/test/CodeGen/X86/bittest-big-integer.ll b/llvm/test/CodeGen/X86/bittest-big-integer.ll index cc3dcf32ac0e..06e7d4773c58 100644 --- a/llvm/test/CodeGen/X86/bittest-big-integer.ll +++ b/llvm/test/CodeGen/X86/bittest-big-integer.ll @@ -1676,3 +1676,291 @@ define i1 @test_ne_i4096(ptr %word, i32 %position) nounwind { %cmp = icmp ne i4096 %test, 0 ret i1 %cmp } + +; Special Cases + +; Multiple uses of the stored value +define i1 @complement_cmpz_i128(ptr %word, i32 %position) nounwind { +; X86-LABEL: complement_cmpz_i128: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $64, %esp +; X86-NEXT: movzbl 12(%ebp), %ecx +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $1, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: shrb $3, %al +; X86-NEXT: andb $12, %al +; X86-NEXT: negb %al +; X86-NEXT: movsbl %al, %esi +; X86-NEXT: movl 36(%esp,%esi), %eax +; X86-NEXT: movl 40(%esp,%esi), %edi +; X86-NEXT: movl %edi, %edx +; X86-NEXT: shldl %cl, %eax, %edx +; X86-NEXT: movl 32(%esp,%esi), %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 44(%esp,%esi), %esi +; X86-NEXT: shldl %cl, %edi, %esi +; X86-NEXT: movl %ebx, %edi +; X86-NEXT: shll %cl, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: shldl %cl, %ebx, %eax +; X86-NEXT: movl 8(%ebp), %ecx +; X86-NEXT: xorl 12(%ecx), %esi +; X86-NEXT: xorl 8(%ecx), %edx +; X86-NEXT: xorl 4(%ecx), %eax +; X86-NEXT: xorl (%ecx), %edi +; X86-NEXT: movl %edx, 8(%ecx) +; X86-NEXT: movl %esi, 12(%ecx) +; X86-NEXT: movl %edi, (%ecx) +; X86-NEXT: movl %eax, 4(%ecx) +; X86-NEXT: orl %esi, %eax +; X86-NEXT: orl %edx, %edi +; X86-NEXT: orl %eax, %edi +; X86-NEXT: setne %al +; X86-NEXT: leal -12(%ebp), %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; SSE-LABEL: complement_cmpz_i128: +; SSE: # %bb.0: +; SSE-NEXT: movl %esi, %ecx +; SSE-NEXT: movl $1, %eax +; SSE-NEXT: xorl %edx, %edx +; SSE-NEXT: shldq %cl, %rax, %rdx +; SSE-NEXT: shlq %cl, %rax +; SSE-NEXT: xorl %esi, %esi +; SSE-NEXT: testb $64, %cl +; SSE-NEXT: cmovneq %rax, %rdx +; SSE-NEXT: cmovneq %rsi, %rax +; SSE-NEXT: xorq 8(%rdi), %rdx +; SSE-NEXT: xorq (%rdi), %rax +; SSE-NEXT: movq %rax, (%rdi) +; SSE-NEXT: movq %rdx, 8(%rdi) +; SSE-NEXT: orq %rdx, %rax +; SSE-NEXT: setne %al +; SSE-NEXT: retq +; +; AVX2-LABEL: complement_cmpz_i128: +; AVX2: # %bb.0: +; AVX2-NEXT: movl %esi, %ecx +; AVX2-NEXT: movl $1, %eax +; AVX2-NEXT: xorl %edx, %edx +; AVX2-NEXT: shldq %cl, %rax, %rdx +; AVX2-NEXT: xorl %esi, %esi +; AVX2-NEXT: shlxq %rcx, %rax, %rax +; AVX2-NEXT: testb $64, %cl +; AVX2-NEXT: cmovneq %rax, %rdx +; AVX2-NEXT: cmovneq %rsi, %rax +; AVX2-NEXT: xorq 8(%rdi), %rdx +; AVX2-NEXT: xorq (%rdi), %rax +; AVX2-NEXT: movq %rax, (%rdi) +; AVX2-NEXT: movq %rdx, 8(%rdi) +; AVX2-NEXT: orq %rdx, %rax +; AVX2-NEXT: setne %al +; AVX2-NEXT: retq +; +; AVX512-LABEL: complement_cmpz_i128: +; AVX512: # %bb.0: +; AVX512-NEXT: movl %esi, %ecx +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: movl $1, %edx +; AVX512-NEXT: xorl %esi, %esi +; AVX512-NEXT: shldq %cl, %rdx, %rsi +; AVX512-NEXT: shlxq %rcx, %rdx, %rdx +; AVX512-NEXT: testb $64, %cl +; AVX512-NEXT: cmovneq %rdx, %rsi +; AVX512-NEXT: cmovneq %rax, %rdx +; AVX512-NEXT: xorq 8(%rdi), %rsi +; AVX512-NEXT: xorq (%rdi), %rdx +; AVX512-NEXT: movq %rdx, (%rdi) +; AVX512-NEXT: movq %rsi, 8(%rdi) +; AVX512-NEXT: orq %rsi, %rdx +; AVX512-NEXT: setne %al +; AVX512-NEXT: retq + %rem = and i32 %position, 127 + %ofs = zext nneg i32 %rem to i128 + %bit = shl nuw i128 1, %ofs + %ld = load i128, ptr %word + %res = xor i128 %ld, %bit + store i128 %res, ptr %word + %cmp = icmp ne i128 %res, 0 + ret i1 %cmp +} + +; Multiple loads in store chain +define i32 @reset_multiload_i128(ptr %word, i32 %position, ptr %p) nounwind { +; X86-LABEL: reset_multiload_i128: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $64, %esp +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $1, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: shrb $3, %al +; X86-NEXT: andb $12, %al +; X86-NEXT: negb %al +; X86-NEXT: movsbl %al, %edi +; X86-NEXT: movl 36(%esp,%edi), %edx +; X86-NEXT: movl 40(%esp,%edi), %ebx +; X86-NEXT: movl %ebx, %esi +; X86-NEXT: shldl %cl, %edx, %esi +; X86-NEXT: movl 32(%esp,%edi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 44(%esp,%edi), %edi +; X86-NEXT: shldl %cl, %ebx, %edi +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NEXT: shll %cl, %ebx +; X86-NEXT: notl %ebx +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl (%eax), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 12(%ebp), %eax +; X86-NEXT: andl $96, %eax +; X86-NEXT: shrl $3, %eax +; X86-NEXT: movl 8(%ebp), %ecx +; X86-NEXT: movl (%ecx,%eax), %eax +; X86-NEXT: andl %ebx, (%ecx) +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: shldl %cl, %ebx, %edx +; X86-NEXT: notl %edx +; X86-NEXT: movl 8(%ebp), %ebx +; X86-NEXT: andl %edx, 4(%ebx) +; X86-NEXT: notl %esi +; X86-NEXT: andl %esi, 8(%ebx) +; X86-NEXT: notl %edi +; X86-NEXT: andl %edi, 12(%ebx) +; X86-NEXT: btl %ecx, %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: jae .LBB22_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: .LBB22_2: +; X86-NEXT: leal -12(%ebp), %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; SSE-LABEL: reset_multiload_i128: +; SSE: # %bb.0: +; SSE-NEXT: movl %esi, %ecx +; SSE-NEXT: movl $1, %esi +; SSE-NEXT: xorl %r8d, %r8d +; SSE-NEXT: shldq %cl, %rsi, %r8 +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: shlq %cl, %rsi +; SSE-NEXT: testb $64, %cl +; SSE-NEXT: cmovneq %rsi, %r8 +; SSE-NEXT: cmovneq %rax, %rsi +; SSE-NEXT: notq %r8 +; SSE-NEXT: notq %rsi +; SSE-NEXT: movl %ecx, %r9d +; SSE-NEXT: andl $96, %r9d +; SSE-NEXT: shrl $3, %r9d +; SSE-NEXT: movl (%rdi,%r9), %r9d +; SSE-NEXT: btl %ecx, %r9d +; SSE-NEXT: jb .LBB22_2 +; SSE-NEXT: # %bb.1: +; SSE-NEXT: movl (%rdx), %eax +; SSE-NEXT: .LBB22_2: +; SSE-NEXT: andq %r8, 8(%rdi) +; SSE-NEXT: andq %rsi, (%rdi) +; SSE-NEXT: # kill: def $eax killed $eax killed $rax +; SSE-NEXT: retq +; +; AVX2-LABEL: reset_multiload_i128: +; AVX2: # %bb.0: +; AVX2-NEXT: movl %esi, %ecx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: movl $1, %r8d +; AVX2-NEXT: xorl %esi, %esi +; AVX2-NEXT: shldq %cl, %r8, %rsi +; AVX2-NEXT: shlxq %rcx, %r8, %r8 +; AVX2-NEXT: testb $64, %cl +; AVX2-NEXT: cmovneq %r8, %rsi +; AVX2-NEXT: cmovneq %rax, %r8 +; AVX2-NEXT: notq %rsi +; AVX2-NEXT: notq %r8 +; AVX2-NEXT: movl %ecx, %r9d +; AVX2-NEXT: andl $96, %r9d +; AVX2-NEXT: shrl $3, %r9d +; AVX2-NEXT: movl (%rdi,%r9), %r9d +; AVX2-NEXT: btl %ecx, %r9d +; AVX2-NEXT: jb .LBB22_2 +; AVX2-NEXT: # %bb.1: +; AVX2-NEXT: movl (%rdx), %eax +; AVX2-NEXT: .LBB22_2: +; AVX2-NEXT: andq %rsi, 8(%rdi) +; AVX2-NEXT: andq %r8, (%rdi) +; AVX2-NEXT: # kill: def $eax killed $eax killed $rax +; AVX2-NEXT: retq +; +; AVX512-LABEL: reset_multiload_i128: +; AVX512: # %bb.0: +; AVX512-NEXT: movl %esi, %ecx +; AVX512-NEXT: movl $1, %r8d +; AVX512-NEXT: xorl %esi, %esi +; AVX512-NEXT: shldq %cl, %r8, %rsi +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: shlxq %rcx, %r8, %r8 +; AVX512-NEXT: testb $64, %cl +; AVX512-NEXT: cmovneq %r8, %rsi +; AVX512-NEXT: cmovneq %rax, %r8 +; AVX512-NEXT: notq %rsi +; AVX512-NEXT: notq %r8 +; AVX512-NEXT: movl %ecx, %r9d +; AVX512-NEXT: andl $96, %r9d +; AVX512-NEXT: shrl $3, %r9d +; AVX512-NEXT: movl (%rdi,%r9), %r9d +; AVX512-NEXT: btl %ecx, %r9d +; AVX512-NEXT: jb .LBB22_2 +; AVX512-NEXT: # %bb.1: +; AVX512-NEXT: movl (%rdx), %eax +; AVX512-NEXT: .LBB22_2: +; AVX512-NEXT: andq %rsi, 8(%rdi) +; AVX512-NEXT: andq %r8, (%rdi) +; AVX512-NEXT: # kill: def $eax killed $eax killed $rax +; AVX512-NEXT: retq + %rem = and i32 %position, 127 + %ofs = zext nneg i32 %rem to i128 + %bit = shl nuw i128 1, %ofs + %mask = xor i128 %bit, -1 + %ld = load i128, ptr %word + %sel = load i32, ptr %p + %test = and i128 %ld, %bit + %res = and i128 %ld, %mask + %cmp = icmp eq i128 %test, 0 + store i128 %res, ptr %word + %ret = select i1 %cmp, i32 %sel, i32 0 + ret i32 %ret +} diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop1.s index d85ea799ed3d..399a6441629c 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop1.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop1.s @@ -1,8 +1,8 @@ // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --unique --version 5 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding -comment-column=0 %s | FileCheck --strict-whitespace --check-prefixes=GFX12,GFX12-ASM %s -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | sed -n 's#.*\(\[0x[0-9a-fx,]\{1,\}\]\)#\1#p' | llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -disassemble -show-encoding -comment-column=0 | FileCheck --strict-whitespace --check-prefixes=GFX12,GFX12-DIS %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -disassemble -show-encoding -comment-column=0 | FileCheck --strict-whitespace --check-prefixes=GFX12,GFX12-DIS %s // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding -comment-column=0 %s | FileCheck --strict-whitespace --check-prefixes=GFX12,GFX12-ASM %s -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | sed -n 's#.*\(\[0x[0-9a-fx,]\{1,\}\]\)#\1#p' | llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding -comment-column=0 | FileCheck --strict-whitespace --check-prefixes=GFX12,GFX12-DIS %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding -comment-column=0 | FileCheck --strict-whitespace --check-prefixes=GFX12,GFX12-DIS %s v_bfrev_b32_e32 v5, v1 // GFX12: v_bfrev_b32_e32 v5, v1 ; encoding: [0x01,0x71,0x0a,0x7e] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll index 7f345133f51d..68cfc659e1e9 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll @@ -660,6 +660,114 @@ exit: ret i32 %red } + +define i32 @test_or_reduction_with_stride_2(i32 %scale, ptr %src) { +; CHECK-LABEL: define i32 @test_or_reduction_with_stride_2( +; CHECK-SAME: i32 [[SCALE:%.*]], ptr [[SRC:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[SCALE]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP66:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 8 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 10 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 12 +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 14 +; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 16 +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 18 +; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 20 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], 22 +; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 24 +; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 26 +; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 28 +; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 30 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP13]] +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP14]] +; CHECK-NEXT: [[TMP31:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP15]] +; CHECK-NEXT: [[TMP32:%.*]] = load i8, ptr [[TMP16]], align 1 +; CHECK-NEXT: [[TMP33:%.*]] = load i8, ptr [[TMP17]], align 1 +; CHECK-NEXT: [[TMP34:%.*]] = load i8, ptr [[TMP18]], align 1 +; CHECK-NEXT: [[TMP35:%.*]] = load i8, ptr [[TMP19]], align 1 +; CHECK-NEXT: [[TMP36:%.*]] = load i8, ptr [[TMP20]], align 1 +; CHECK-NEXT: [[TMP37:%.*]] = load i8, ptr [[TMP21]], align 1 +; CHECK-NEXT: [[TMP38:%.*]] = load i8, ptr [[TMP22]], align 1 +; CHECK-NEXT: [[TMP39:%.*]] = load i8, ptr [[TMP23]], align 1 +; CHECK-NEXT: [[TMP40:%.*]] = load i8, ptr [[TMP24]], align 1 +; CHECK-NEXT: [[TMP41:%.*]] = load i8, ptr [[TMP25]], align 1 +; CHECK-NEXT: [[TMP42:%.*]] = load i8, ptr [[TMP26]], align 1 +; CHECK-NEXT: [[TMP43:%.*]] = load i8, ptr [[TMP27]], align 1 +; CHECK-NEXT: [[TMP44:%.*]] = load i8, ptr [[TMP28]], align 1 +; CHECK-NEXT: [[TMP45:%.*]] = load i8, ptr [[TMP29]], align 1 +; CHECK-NEXT: [[TMP46:%.*]] = load i8, ptr [[TMP30]], align 1 +; CHECK-NEXT: [[TMP47:%.*]] = load i8, ptr [[TMP31]], align 1 +; CHECK-NEXT: [[TMP48:%.*]] = insertelement <16 x i8> poison, i8 [[TMP32]], i32 0 +; CHECK-NEXT: [[TMP49:%.*]] = insertelement <16 x i8> [[TMP48]], i8 [[TMP33]], i32 1 +; CHECK-NEXT: [[TMP50:%.*]] = insertelement <16 x i8> [[TMP49]], i8 [[TMP34]], i32 2 +; CHECK-NEXT: [[TMP51:%.*]] = insertelement <16 x i8> [[TMP50]], i8 [[TMP35]], i32 3 +; CHECK-NEXT: [[TMP52:%.*]] = insertelement <16 x i8> [[TMP51]], i8 [[TMP36]], i32 4 +; CHECK-NEXT: [[TMP53:%.*]] = insertelement <16 x i8> [[TMP52]], i8 [[TMP37]], i32 5 +; CHECK-NEXT: [[TMP54:%.*]] = insertelement <16 x i8> [[TMP53]], i8 [[TMP38]], i32 6 +; CHECK-NEXT: [[TMP55:%.*]] = insertelement <16 x i8> [[TMP54]], i8 [[TMP39]], i32 7 +; CHECK-NEXT: [[TMP56:%.*]] = insertelement <16 x i8> [[TMP55]], i8 [[TMP40]], i32 8 +; CHECK-NEXT: [[TMP57:%.*]] = insertelement <16 x i8> [[TMP56]], i8 [[TMP41]], i32 9 +; CHECK-NEXT: [[TMP58:%.*]] = insertelement <16 x i8> [[TMP57]], i8 [[TMP42]], i32 10 +; CHECK-NEXT: [[TMP59:%.*]] = insertelement <16 x i8> [[TMP58]], i8 [[TMP43]], i32 11 +; CHECK-NEXT: [[TMP60:%.*]] = insertelement <16 x i8> [[TMP59]], i8 [[TMP44]], i32 12 +; CHECK-NEXT: [[TMP61:%.*]] = insertelement <16 x i8> [[TMP60]], i8 [[TMP45]], i32 13 +; CHECK-NEXT: [[TMP62:%.*]] = insertelement <16 x i8> [[TMP61]], i8 [[TMP46]], i32 14 +; CHECK-NEXT: [[TMP63:%.*]] = insertelement <16 x i8> [[TMP62]], i8 [[TMP47]], i32 15 +; CHECK-NEXT: [[TMP64:%.*]] = sext <16 x i8> [[TMP63]] to <16 x i32> +; CHECK-NEXT: [[TMP65:%.*]] = mul <16 x i32> [[BROADCAST_SPLAT]], [[TMP64]] +; CHECK-NEXT: [[TMP66]] = or <16 x i32> [[TMP65]], [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; CHECK-NEXT: [[TMP67:%.*]] = icmp eq i64 [[INDEX_NEXT]], 48 +; CHECK-NEXT: br i1 [[TMP67]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP68:%.*]] = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> [[TMP66]]) +; CHECK-NEXT: br label %[[SCALAR_PH:.*]] +; CHECK: [[SCALAR_PH]]: +; +entry: + br label %loop + +loop: + %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] + %reduction = phi i32 [ %reduction.next, %loop ], [ 0, %entry ] + %gep = getelementptr [32 x i8], ptr %src, i64 %iv + %load = load i8, ptr %gep, align 1 + %sext = sext i8 %load to i32 + %mul = mul i32 %scale, %sext + %reduction.next = or i32 %mul, %reduction + %iv.next = add i64 %iv, 2 + %cmp = icmp eq i64 %iv.next, 100 + br i1 %cmp, label %exit, label %loop + +exit: + ret i32 %reduction.next +} + attributes #0 = { "target-cpu"="neoverse-512tvb" } !0 = !{!1, !2, i64 0} diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-profile.ll b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-profile.ll new file mode 100644 index 000000000000..9cc417f6b874 --- /dev/null +++ b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-profile.ll @@ -0,0 +1,89 @@ +; RUN: split-file %s %t +; RUN: cat %t/main.ll %t/probable-or.prof > %t/probable-or.ll +; RUN: cat %t/main.ll %t/probable-and.prof > %t/probable-and.ll +; RUN: opt -passes='loop(simple-loop-unswitch<nontrivial>)' -S %t/probable-or.ll -o -| FileCheck %t/probable-or.prof +; RUN: opt -passes='loop(simple-loop-unswitch<nontrivial>)' -S %t/probable-and.ll -o -| FileCheck %t/probable-and.prof + +;--- main.ll +declare i32 @a() +declare i32 @b() + +define i32 @or(ptr %ptr, i1 %cond) !prof !0 { +entry: + br label %loop_begin + +loop_begin: + %v1 = load i1, ptr %ptr + %cond_or = or i1 %v1, %cond + br i1 %cond_or, label %loop_a, label %loop_b, !prof !1 + +loop_a: + call i32 @a() + br label %latch + +loop_b: + call i32 @b() + br label %latch + +latch: + %v2 = load i1, ptr %ptr + br i1 %v2, label %loop_begin, label %loop_exit, !prof !2 + +loop_exit: + ret i32 0 +} + +define i32 @and(ptr %ptr, i1 %cond) !prof !0 { +entry: + br label %loop_begin + +loop_begin: + %v1 = load i1, ptr %ptr + %cond_and = and i1 %v1, %cond + br i1 %cond_and, label %loop_a, label %loop_b, !prof !1 + +loop_a: + call i32 @a() + br label %latch + +loop_b: + call i32 @b() + br label %latch + +latch: + %v2 = load i1, ptr %ptr + br i1 %v2, label %loop_begin, label %loop_exit, !prof !2 + +loop_exit: + ret i32 0 +} + +;--- probable-or.prof +!0 = !{!"function_entry_count", i32 10} +!1 = !{!"branch_weights", i32 1, i32 1000} +!2 = !{!"branch_weights", i32 5, i32 7} +; CHECK-LABEL: @or +; CHECK-LABEL: entry: +; CHECK-NEXT: %cond.fr = freeze i1 %cond +; CHECK-NEXT: br i1 %cond.fr, label %entry.split.us, label %entry.split, !prof !1 +; CHECK-LABEL: @and +; CHECK-LABEL: entry: +; CHECK-NEXT: %cond.fr = freeze i1 %cond +; CHECK-NEXT: br i1 %cond.fr, label %entry.split, label %entry.split.us, !prof !3 +; CHECK: !1 = !{!"branch_weights", i32 1, i32 1000} +; CHECK: !3 = !{!"unknown", !"simple-loop-unswitch"} + +;--- probable-and.prof +!0 = !{!"function_entry_count", i32 10} +!1 = !{!"branch_weights", i32 1000, i32 1} +!2 = !{!"branch_weights", i32 5, i32 7} +; CHECK-LABEL: @or +; CHECK-LABEL: entry: +; CHECK-NEXT: %cond.fr = freeze i1 %cond +; CHECK-NEXT: br i1 %cond.fr, label %entry.split.us, label %entry.split, !prof !1 +; CHECK-LABEL: @and +; CHECK-LABEL: entry: +; CHECK-NEXT: %cond.fr = freeze i1 %cond +; CHECK-NEXT: br i1 %cond.fr, label %entry.split, label %entry.split.us, !prof !3 +; CHECK: !1 = !{!"unknown", !"simple-loop-unswitch"} +; CHECK: !3 = !{!"branch_weights", i32 1000, i32 1} diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/pr60736.ll b/llvm/test/Transforms/SimpleLoopUnswitch/pr60736.ll index 0964c55d1dec..3760be4b26f2 100644 --- a/llvm/test/Transforms/SimpleLoopUnswitch/pr60736.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/pr60736.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals ; RUN: opt < %s -simple-loop-unswitch-inject-invariant-conditions=true -passes='loop(simple-loop-unswitch<nontrivial>,loop-instsimplify)' -S | FileCheck %s define void @test() { @@ -7,7 +7,7 @@ define void @test() { ; CHECK-NEXT: [[TMP:%.*]] = call i1 @llvm.experimental.widenable.condition() ; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr addrspace(1) poison unordered, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load atomic i32, ptr addrspace(1) poison unordered, align 8 -; CHECK-NEXT: br i1 [[TMP]], label [[BB_SPLIT:%.*]], label [[BB3_SPLIT_US:%.*]] +; CHECK-NEXT: br i1 [[TMP]], label [[BB_SPLIT:%.*]], label [[BB3_SPLIT_US:%.*]], !prof [[PROF0:![0-9]+]] ; CHECK: bb.split: ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb3: @@ -19,7 +19,7 @@ define void @test() { ; CHECK-NEXT: [[TMP6_US:%.*]] = phi i32 [ poison, [[BB3_SPLIT_US]] ] ; CHECK-NEXT: [[TMP7_US:%.*]] = add nuw nsw i32 [[TMP6_US]], 2 ; CHECK-NEXT: [[TMP8_US:%.*]] = icmp ult i32 [[TMP7_US]], [[TMP2]] -; CHECK-NEXT: br i1 [[TMP8_US]], label [[BB9_US:%.*]], label [[BB16_SPLIT_US:%.*]], !prof [[PROF0:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP8_US]], label [[BB9_US:%.*]], label [[BB16_SPLIT_US:%.*]], !prof [[PROF0]] ; CHECK: bb9.us: ; CHECK-NEXT: br label [[BB17_SPLIT_US:%.*]] ; CHECK: bb16.split.us: @@ -96,3 +96,8 @@ declare i1 @llvm.experimental.widenable.condition() !0 = !{!"branch_weights", i32 1048576, i32 1} +;. +; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(inaccessiblemem: readwrite) } +;. +; CHECK: [[PROF0]] = !{!"branch_weights", i32 1048576, i32 1} +;. diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/simple-unswitch-profile.ll b/llvm/test/Transforms/SimpleLoopUnswitch/simple-unswitch-profile.ll new file mode 100644 index 000000000000..ec6baa5b3772 --- /dev/null +++ b/llvm/test/Transforms/SimpleLoopUnswitch/simple-unswitch-profile.ll @@ -0,0 +1,157 @@ +; RUN: split-file %s %t +; RUN: cat %t/main.ll %t/probable-or.prof > %t/probable-or.ll +; RUN: cat %t/main.ll %t/probable-and.prof > %t/probable-and.ll +; RUN: opt -passes='loop-mssa(simple-loop-unswitch)' -S %t/probable-or.ll -o - | FileCheck %t/probable-or.prof +; RUN: opt -passes='loop-mssa(simple-loop-unswitch)' -S %t/probable-and.ll -o - | FileCheck %t/probable-and.prof +; +; RUN: opt -passes='module(print<block-freq>),function(loop-mssa(simple-loop-unswitch)),module(print<block-freq>)' \ +; RUN: %t/probable-or.ll -disable-output -simple-loop-unswitch-estimate-profile=0 2>&1 | FileCheck %t/probable-or.prof --check-prefixes=PROFILE-COM,PROFILE-REF + +; RUN: opt -passes='module(print<block-freq>),function(loop-mssa(simple-loop-unswitch)),module(print<block-freq>)' \ +; RUN: %t/probable-or.ll -disable-output -simple-loop-unswitch-estimate-profile=1 2>&1 | FileCheck %t/probable-or.prof --check-prefixes=PROFILE-COM,PROFILE-CHK + +; RUN: opt -passes='module(print<block-freq>),function(loop-mssa(simple-loop-unswitch)),module(print<block-freq>)' \ +; RUN: %t/probable-and.ll -disable-output -simple-loop-unswitch-estimate-profile=0 2>&1 | FileCheck %t/probable-and.prof --check-prefixes=PROFILE-COM,PROFILE-REF + +; RUN: opt -passes='module(print<block-freq>),function(loop-mssa(simple-loop-unswitch)),module(print<block-freq>)' \ +; RUN: %t/probable-and.ll -disable-output -simple-loop-unswitch-estimate-profile=1 2>&1 | FileCheck %t/probable-and.prof --check-prefixes=PROFILE-COM,PROFILE-CHK + +;--- main.ll +declare void @some_func() noreturn + +define i32 @or(i1 %cond1, i32 %var1) !prof !0 { +entry: + br label %loop_begin + +loop_begin: + %var3 = phi i32 [%var1, %entry], [%var2, %do_something] + %cond2 = icmp eq i32 %var3, 10 + %cond.or = or i1 %cond1, %cond2 + br i1 %cond.or, label %loop_exit, label %do_something, !prof !1 + +do_something: + %var2 = add i32 %var3, 1 + call void @some_func() noreturn nounwind + br label %loop_begin + +loop_exit: + ret i32 0 +} + +define i32 @and(i1 %cond1, i32 %var1) !prof !0 { +entry: + br label %loop_begin + +loop_begin: + %var3 = phi i32 [%var1, %entry], [%var2, %do_something] + %cond2 = icmp eq i32 %var3, 10 + %cond.and = and i1 %cond1, %cond2 + br i1 %cond.and, label %do_something, label %loop_exit, !prof !1 + +do_something: + %var2 = add i32 %var3, 1 + call void @some_func() noreturn nounwind + br label %loop_begin + +loop_exit: + ret i32 0 +} + +;--- probable-or.prof +!0 = !{!"function_entry_count", i32 10} +!1 = !{!"branch_weights", i32 1, i32 1000} +; CHECK-LABEL: @or +; CHECK-LABEL: entry: +; CHECK-NEXT: %cond1.fr = freeze i1 %cond1 +; CHECK-NEXT: br i1 %cond1.fr, label %loop_exit.split, label %entry.split, !prof !1 +; CHECK-LABEL: @and +; CHECK-LABEL: entry: +; CHECK-NEXT: %cond1.fr = freeze i1 %cond1 +; CHECK-NEXT: br i1 %cond1.fr, label %entry.split, label %loop_exit.split, !prof !2 +; CHECK: !1 = !{!"branch_weights", i32 1, i32 1000} +; CHECK: !2 = !{!"unknown", !"simple-loop-unswitch"} + +; PROFILE-COM: Printing analysis results of BFI for function 'or': +; PROFILE-COM: block-frequency-info: or + ; PROFILE-COM: - entry: {{.*}} count = 10 + ; PROFILE-COM: - loop_begin: {{.*}} count = 10010 + ; PROFILE-COM: - do_something: {{.*}} count = 10000 + ; PROFILE-COM: - loop_exit: {{.*}} count = 10 + +; PROFILE-COM: Printing analysis results of BFI for function 'and': +; PROFILE-COM: block-frequency-info: and + ; PROFILE-COM: - entry: {{.*}} count = 10 + ; PROFILE-COM: - loop_begin: {{.*}} count = 10 + ; PROFILE-COM: - do_something: {{.*}} count = 0 + ; PROFILE-COM: - loop_exit: {{.*}} count = 10 + +; PROFILE-COM: Printing analysis results of BFI for function 'or': +; PROFILE-COM: block-frequency-info: or + ; PROFILE-COM: - entry: {{.*}} count = 10 + ; PROFILE-REF: - entry.split: {{.*}} count = 5 + ; PROFILE-CHK: - entry.split: {{.*}} count = 10 + ; PROFILE-REF: - loop_begin: {{.*}} count = 5005 + ; PROFILE-CHK: - loop_begin: {{.*}} count = 10000 + ; PROFILE-REF: - do_something: {{.*}} count = 5000 + ; PROFILE-CHK: - do_something: {{.*}} count = 9990 + ; PROFILE-REF: - loop_exit: {{.*}} count = 5 + ; PROFILE-CHK: - loop_exit: {{.*}} count = 10 + ; PROFILE-COM: - loop_exit.split: {{.*}} count = 10 + +; PROFILE-COM: Printing analysis results of BFI for function 'and': +; PROFILE-COM: block-frequency-info: and + ; PROFILE-COM: - entry: {{.*}} count = 10 + ; PROFILE-COM: - entry.split: {{.*}} count = 5 + ; PROFILE-COM: - loop_begin: {{.*}} count = 5 + ; PROFILE-COM: - do_something: {{.*}} count = 0 + ; PROFILE-COM: - loop_exit: {{.*}} count = 5 + ; PROFILE-COM: - loop_exit.split: {{.*}} count = 10 + +;--- probable-and.prof +!0 = !{!"function_entry_count", i32 10} +!1 = !{!"branch_weights", i32 1000, i32 1} +; CHECK-LABEL: @or +; CHECK-LABEL: entry: +; CHECK-NEXT: %cond1.fr = freeze i1 %cond1 +; CHECK-NEXT: br i1 %cond1.fr, label %loop_exit.split, label %entry.split, !prof !1 +; CHECK-LABEL: @and +; CHECK-LABEL: entry: +; CHECK-NEXT: %cond1.fr = freeze i1 %cond1 +; CHECK-NEXT: br i1 %cond1.fr, label %entry.split, label %loop_exit.split, !prof !2 +; CHECK: !1 = !{!"unknown", !"simple-loop-unswitch"} +; CHECK: !2 = !{!"branch_weights", i32 1000, i32 1} +; PROFILE-COM: Printing analysis results of BFI for function 'or': +; PROFILE-COM: block-frequency-info: or + ; PROFILE-COM: - entry: {{.*}}, count = 10 + ; PROFILE-COM: - loop_begin: {{.*}}, count = 10 + ; PROFILE-COM: - do_something: {{.*}}, count = 0 + ; PROFILE-COM: - loop_exit: {{.*}}, count = 10 + +; PROFILE-COM: Printing analysis results of BFI for function 'and': +; PROFILE-COM: block-frequency-info: and + ; PROFILE-COM: - entry: {{.*}} count = 10 + ; PROFILE-COM: - loop_begin: {{.*}} count = 10010 + ; PROFILE-COM: - do_something: {{.*}} count = 10000 + ; PROFILE-COM: - loop_exit: {{.*}} count = 10 + +; PROFILE-COM: Printing analysis results of BFI for function 'or': +; PROFILE-COM: block-frequency-info: or + ; PROFILE-COM: - entry: {{.*}} count = 10 + ; PROFILE-COM: - entry.split: {{.*}} count = 5 + ; PROFILE-COM: - loop_begin: {{.*}} count = 5 + ; PROFILE-COM: - do_something: {{.*}} count = 0 + ; PROFILE-COM: - loop_exit: {{.*}} count = 5 + ; PROFILE-COM: - loop_exit.split: {{.*}} count = 10 + +; PROFILE-COM: Printing analysis results of BFI for function 'and': +; PROFILE-COM: block-frequency-info: and + ; PROFILE-COM: - entry: {{.*}} count = 10 + ; PROFILE-REF: - entry.split: {{.*}} count = 5 + ; PROFILE-CHK: - entry.split: {{.*}} count = 10 + ; PROFILE-REF: - loop_begin: {{.*}} count = 5005 + ; PROFILE-CHK: - loop_begin: {{.*}} count = 10000 + ; PROFILE-REF: - do_something: {{.*}} count = 5000 + ; PROFILE-CHK: - do_something: {{.*}} count = 9990 + ; PROFILE-REF: - loop_exit: {{.*}} count = 5 + ; PROFILE-CHK: - loop_exit: {{.*}} count = 10 + ; PROFILE-COM: - loop_exit.split: {{.*}} count = 10 diff --git a/llvm/test/Transforms/SimplifyCFG/pr165301.ll b/llvm/test/Transforms/SimplifyCFG/pr165301.ll new file mode 100644 index 000000000000..4a539d77af3c --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/pr165301.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -S -passes="simplifycfg<switch-range-to-icmp>" < %s | FileCheck %s + +; Make sure there's no use after free when removing incoming values from PHI nodes + +define i32 @pr165301(i1 %cond) { +; CHECK-LABEL: define i32 @pr165301( +; CHECK-SAME: i1 [[COND:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[SWITCHBB:.*]] +; CHECK: [[SWITCHBB]]: +; CHECK-NEXT: br label %[[SWITCHBB]] +; +entry: + br label %switchbb + +switchbb: + switch i1 %cond, label %default [ + i1 false, label %switchbb + i1 true, label %switchbb + ] + +default: + %phi.lcssa = phi i32 [ 0, %switchbb ] + ret i32 %phi.lcssa +} diff --git a/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-feature-warning.test b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-feature-warning.test new file mode 100644 index 000000000000..24726c34d350 --- /dev/null +++ b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-feature-warning.test @@ -0,0 +1,37 @@ +## This test checks that we output a warning when the specified version is too old to support the given features. + +# RUN: yaml2obj %s -o %t +# RUN: llvm-readobj --bb-addr-map %t 2>&1 | FileCheck -DFILE=%t %s + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + +# CHECK: BBAddrMap [ +# CHECK-NEXT: warning: '[[FILE]]': unable to dump SHT_LLVM_BB_ADDR_MAP section with index 1: version should be >= 3 for SHT_LLVM_BB_ADDR_MAP when callsite offsets feature is enabled: version = 2 feature = 32 +Sections: + - Name: '.llvm_bb_addr_map (1)' + Type: SHT_LLVM_BB_ADDR_MAP + Entries: + - Version: 2 + Feature: 0x20 + +# CHECK: BBAddrMap [ +# CHECK-NEXT: warning: '[[FILE]]': unable to dump SHT_LLVM_BB_ADDR_MAP section with index 2: version should be >= 4 for SHT_LLVM_BB_ADDR_MAP when basic block hash feature is enabled: version = 3 feature = 64 + + - Name: '.llvm_bb_addr_map (2)' + Type: SHT_LLVM_BB_ADDR_MAP + Entries: + - Version: 3 + Feature: 0x40 + +# CHECK: BBAddrMap [ +# CHECK-NEXT: warning: '[[FILE]]': unable to dump SHT_LLVM_BB_ADDR_MAP section with index 3: version should be >= 5 for SHT_LLVM_BB_ADDR_MAP when post link cfg feature is enabled: version = 4 feature = 128 + + - Name: '.llvm_bb_addr_map (3)' + Type: SHT_LLVM_BB_ADDR_MAP + Entries: + - Version: 4 + Feature: 0x80 diff --git a/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-pgo-analysis-map.test b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-pgo-analysis-map.test index 5faafd4d83b2..8e9d2271b872 100644 --- a/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-pgo-analysis-map.test +++ b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-pgo-analysis-map.test @@ -15,7 +15,7 @@ ## Check that a malformed section can be handled. # RUN: yaml2obj %s -DBITS=32 -DSIZE=24 -o %t2.o -# RUN: llvm-readobj %t2.o --bb-addr-map 2>&1 | FileCheck --match-full-lines %s -DOFFSET=0x00000018 -DFILE=%t2.o --check-prefix=TRUNCATED +# RUN: llvm-readobj %t2.o --bb-addr-map 2>&1 | FileCheck --match-full-lines %s -DOFFSET=0x00000015 -DFILE=%t2.o --check-prefix=TRUNCATED ## Check that missing features can be handled. # RUN: yaml2obj %s -DBITS=32 -DFEATURE=0x2 -o %t3.o @@ -59,17 +59,20 @@ # CHECK-NEXT: { # RAW-NEXT: Frequency: 100 # PRETTY-NEXT: Frequency: 1.0 +# CHECK-NEXT: PostLink Frequency: 10 # CHECK-NEXT: Successors [ # CHECK-NEXT: { # CHECK-NEXT: ID: 2 # RAW-NEXT: Probability: 0x80000000 # PRETTY-NEXT: Probability: 0x80000000 / 0x80000000 = 100.00% +# CHECK-NEXT: PostLink Probability: 7 # CHECK-NEXT: } # CHECK-NEXT: ] # CHECK-NEXT: } # CHECK-NEXT: { # RAW-NEXT: Frequency: 100 # PRETTY-NEXT: Frequency: 1.0 +# CHECK-NEXT: PostLink Frequency: 0 # CHECK-NEXT: Successors [ # CHECK-NEXT: ] # CHECK-NEXT: } @@ -172,8 +175,8 @@ Sections: ShSize: [[SIZE=<none>]] Link: .text Entries: - - Version: 2 - Feature: 0x7 + - Version: 5 + Feature: 0x87 BBRanges: - BaseAddress: [[ADDR=0x11111]] BBEntries: @@ -197,10 +200,12 @@ Sections: PGOAnalyses: - FuncEntryCount: 100 PGOBBEntries: - - BBFreq: 100 + - BBFreq: 100 + PostLinkBBFreq: 10 Successors: - - ID: 2 - BrProb: 0x80000000 + - ID: 2 + BrProb: 0x80000000 + PostLinkBrFreq: 7 - BBFreq: 100 Successors: [] - FuncEntryCount: 8888 diff --git a/llvm/test/tools/obj2yaml/ELF/bb-addr-map-pgo-analysis-map.yaml b/llvm/test/tools/obj2yaml/ELF/bb-addr-map-pgo-analysis-map.yaml index 299bf463cf4b..645507af080c 100644 --- a/llvm/test/tools/obj2yaml/ELF/bb-addr-map-pgo-analysis-map.yaml +++ b/llvm/test/tools/obj2yaml/ELF/bb-addr-map-pgo-analysis-map.yaml @@ -15,7 +15,7 @@ # VALID-NEXT: Type: SHT_LLVM_BB_ADDR_MAP # VALID-NEXT: Entries: # VALID-NEXT: - Version: 2 -# VALID-NEXT: Feature: 0x7 +# VALID-NEXT: Feature: 0x87 ## The 'BaseAddress' field is omitted when it's zero. # VALID-NEXT: BBRanges: # VALID-NEXT: - BBEntries: @@ -43,17 +43,23 @@ # VALID-NEXT: PGOAnalyses: # VALID-NEXT: - FuncEntryCount: 100 # VALID-NEXT: PGOBBEntries: -# VALID-NEXT: - BBFreq: 100 +# VALID-NEXT: - BBFreq: 100 +# VALID-NEXT: PostLinkBBFreq: 10 # VALID-NEXT: Successors: -# VALID-NEXT: - ID: 2 -# VALID-NEXT: BrProb: 0x80000000 -# VALID-NEXT: - ID: 4 -# VALID-NEXT: BrProb: 0x80000000 -# VALID-NEXT: - BBFreq: 50 +# VALID-NEXT: - ID: 2 +# VALID-NEXT: BrProb: 0x80000000 +# VALID-NEXT: PostLinkBrFreq: 7 +# VALID-NEXT: - ID: 4 +# VALID-NEXT: BrProb: 0x80000000 +# VALID-NEXT: PostLinkBrFreq: 0 +# VALID-NEXT: - BBFreq: 50 +# VALID-NEXT: PostLinkBBFreq: 0 # VALID-NEXT: Successors: -# VALID-NEXT: - ID: 4 -# VALID-NEXT: BrProb: 0xFFFFFFFF -# VALID-NEXT: - BBFreq: 100 +# VALID-NEXT: - ID: 4 +# VALID-NEXT: BrProb: 0xFFFFFFFF +# VALID-NEXT: PostLinkBrFreq: 0 +# VALID-NEXT: - BBFreq: 100 +# VALID-NEXT: PostLinkBBFreq: 3 # VALID-NEXT: Successors: [] # VALID-NEXT: PGOBBEntries: # VALID-NEXT: - BBFreq: 20 @@ -69,7 +75,7 @@ Sections: ShSize: [[SIZE=<none>]] Entries: - Version: 2 - Feature: 0x7 + Feature: 0x87 BBRanges: - BaseAddress: 0x0 BBEntries: @@ -97,17 +103,20 @@ Sections: PGOAnalyses: - FuncEntryCount: 100 PGOBBEntries: - - BBFreq: 100 + - BBFreq: 100 + PostLinkBBFreq: 10 Successors: - - ID: 2 - BrProb: 0x80000000 - - ID: 4 - BrProb: 0x80000000 - - BBFreq: 50 + - ID: 2 + BrProb: 0x80000000 + PostLinkBrFreq: 7 + - ID: 4 + BrProb: 0x80000000 + - BBFreq: 50 Successors: - - ID: 4 - BrProb: 0xFFFFFFFF - - BBFreq: 100 + - ID: 4 + BrProb: 0xFFFFFFFF + - BBFreq: 100 + PostLinkBBFreq: 3 Successors: [] - PGOBBEntries: - BBFreq: 20 diff --git a/llvm/test/tools/yaml2obj/ELF/bb-addr-map-pgo-analysis-map.yaml b/llvm/test/tools/yaml2obj/ELF/bb-addr-map-pgo-analysis-map.yaml index a4cb572e6d99..ac9c8d402b0a 100644 --- a/llvm/test/tools/yaml2obj/ELF/bb-addr-map-pgo-analysis-map.yaml +++ b/llvm/test/tools/yaml2obj/ELF/bb-addr-map-pgo-analysis-map.yaml @@ -6,8 +6,9 @@ # Case 4: Specify Entries. # CHECK: Name: .llvm_bb_addr_map (1) # CHECK: SectionData ( -# CHECK-NEXT: 0000: 02072000 00000000 0000010B 010203E8 -# CHECK-NEXT: 0010: 07E80702 0CEEDDBB F70E0D91 A2C48801 +# CHECK-NEXT: 0000: 02872000 00000000 0000010B 010203E8 +# CHECK-NEXT: 0010: 07E80764 020CEEDD BBF70E28 0D91A2C4 +# CHECK-NEXT: 0020: 880100 # CHECK-NEXT: ) # Case 7: Not including a field which is enabled in feature doesn't emit value @@ -26,12 +27,12 @@ Sections: ## Test the following cases: ## 1) We can produce an .llvm_bb_addr_map section from a description with -## Entries and PGO Analysis data. +## Entries and PGO Analysis and Post Link data. - Name: '.llvm_bb_addr_map (1)' Type: SHT_LLVM_BB_ADDR_MAP Entries: - Version: 2 - Feature: 0x7 + Feature: 0x87 BBRanges: - BaseAddress: 0x0000000000000020 BBEntries: @@ -42,12 +43,14 @@ Sections: PGOAnalyses: - FuncEntryCount: 1000 PGOBBEntries: - - BBFreq: 1000 + - BBFreq: 1000 + PostLinkBBFreq: 100 Successors: - - ID: 12 - BrProb: 0xeeeeeeee - - ID: 13 - BrProb: 0x11111111 + - ID: 12 + BrProb: 0xeeeeeeee + PostLinkBrFreq: 40 + - ID: 13 + BrProb: 0x11111111 ## 2) According to feature we have FuncEntryCount but none is provided in yaml - Name: '.llvm_bb_addr_map (2)' @@ -66,7 +69,7 @@ Sections: ## Check that yaml2obj generates a warning when we use unsupported feature. # RUN: yaml2obj --docnum=2 %s 2>&1 | FileCheck %s --check-prefix=INVALID-FEATURE -# INVALID-FEATURE: warning: invalid encoding for BBAddrMap::Features: 0xf0 +# INVALID-FEATURE: warning: invalid encoding for BBAddrMap::Features: 0x100 --- !ELF FileHeader: @@ -79,4 +82,4 @@ Sections: Entries: - Version: 2 ## Specify unsupported feature - Feature: 0xF0 + Feature: 0x100 diff --git a/llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml b/llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml index 339e419b3945..05d77d67e446 100644 --- a/llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml +++ b/llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml @@ -220,7 +220,7 @@ Sections: ## Check that yaml2obj generates a warning when we use unsupported versions. # RUN: yaml2obj --docnum=3 %s 2>&1 | FileCheck %s --check-prefix=INVALID-VERSION -# INVALID-VERSION: warning: unsupported SHT_LLVM_BB_ADDR_MAP version: 5; encoding using the most recent version +# INVALID-VERSION: warning: unsupported SHT_LLVM_BB_ADDR_MAP version: 6; encoding using the most recent version --- !ELF FileHeader: @@ -232,4 +232,4 @@ Sections: Type: SHT_LLVM_BB_ADDR_MAP Entries: ## Specify unsupported version - - Version: 5 + - Version: 6 diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index 423a11fd5b72..6f09da5a4099 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -8188,6 +8188,8 @@ void LLVMELFDumper<ELFT>::printBBAddrMaps(bool PrettyPGOAnalysis) { } else { W.printNumber("Frequency", PBBE.BlockFreq.getFrequency()); } + if (PAM.FeatEnable.PostLinkCfg) + W.printNumber("PostLink Frequency", PBBE.PostLinkBlockFreq); } if (PAM.FeatEnable.BrProb) { @@ -8200,6 +8202,8 @@ void LLVMELFDumper<ELFT>::printBBAddrMaps(bool PrettyPGOAnalysis) { } else { W.printHex("Probability", Succ.Prob.getNumerator()); } + if (PAM.FeatEnable.PostLinkCfg) + W.printNumber("PostLink Probability", Succ.PostLinkFreq); } } } diff --git a/llvm/tools/obj2yaml/elf2yaml.cpp b/llvm/tools/obj2yaml/elf2yaml.cpp index 68e18f6c7920..4364d15a8b45 100644 --- a/llvm/tools/obj2yaml/elf2yaml.cpp +++ b/llvm/tools/obj2yaml/elf2yaml.cpp @@ -895,7 +895,7 @@ ELFDumper<ELFT>::dumpBBAddrMapSection(const Elf_Shdr *Shdr) { std::vector<ELFYAML::PGOAnalysisMapEntry> PGOAnalyses; DataExtractor::Cursor Cur(0); uint8_t Version = 0; - uint8_t Feature = 0; + uint16_t Feature = 0; uint64_t Address = 0; while (Cur && Cur.tell() < Content.size()) { if (Shdr->sh_type == ELF::SHT_LLVM_BB_ADDR_MAP) { @@ -905,7 +905,7 @@ ELFDumper<ELFT>::dumpBBAddrMapSection(const Elf_Shdr *Shdr) { errc::invalid_argument, "invalid SHT_LLVM_BB_ADDR_MAP section version: " + Twine(static_cast<int>(Version))); - Feature = Data.getU8(Cur); + Feature = Version < 5 ? Data.getU8(Cur) : Data.getU16(Cur); } uint64_t NumBBRanges = 1; uint64_t NumBlocks = 0; @@ -972,6 +972,8 @@ ELFDumper<ELFT>::dumpBBAddrMapSection(const Elf_Shdr *Shdr) { auto &PGOBBEntry = PGOBBEntries.emplace_back(); if (FeatureOrErr->BBFreq) { PGOBBEntry.BBFreq = Data.getULEB128(Cur); + if (FeatureOrErr->PostLinkCfg) + PGOBBEntry.PostLinkBBFreq = Data.getULEB128(Cur); if (!Cur) break; } @@ -982,7 +984,10 @@ ELFDumper<ELFT>::dumpBBAddrMapSection(const Elf_Shdr *Shdr) { for (uint64_t SuccIdx = 0; Cur && SuccIdx < SuccCount; ++SuccIdx) { uint32_t ID = Data.getULEB128(Cur); uint32_t BrProb = Data.getULEB128(Cur); - SuccEntries.push_back({ID, BrProb}); + std::optional<uint32_t> PostLinkBrFreq; + if (FeatureOrErr->PostLinkCfg) + PostLinkBrFreq = Data.getULEB128(Cur); + SuccEntries.push_back({ID, BrProb, PostLinkBrFreq}); } } } diff --git a/llvm/unittests/ADT/TypeSwitchTest.cpp b/llvm/unittests/ADT/TypeSwitchTest.cpp index a7d934265c5f..b80122837c1a 100644 --- a/llvm/unittests/ADT/TypeSwitchTest.cpp +++ b/llvm/unittests/ADT/TypeSwitchTest.cpp @@ -142,3 +142,44 @@ TEST(TypeSwitchTest, DefaultUnreachableWithVoid) { EXPECT_DEATH((void)translate(DerivedD()), "Unhandled type"); #endif } + +TEST(TypeSwitchTest, DefaultNullopt) { + auto translate = [](auto value) { + return TypeSwitch<Base *, std::optional<int>>(&value) + .Case([](DerivedA *) { return 0; }) + .Default(std::nullopt); + }; + EXPECT_EQ(0, translate(DerivedA())); + EXPECT_EQ(std::nullopt, translate(DerivedD())); +} + +TEST(TypeSwitchTest, DefaultNullptr) { + float foo = 0.0f; + auto translate = [&](auto value) { + return TypeSwitch<Base *, float *>(&value) + .Case([&](DerivedA *) { return &foo; }) + .Default(nullptr); + }; + EXPECT_EQ(&foo, translate(DerivedA())); + EXPECT_EQ(nullptr, translate(DerivedD())); +} + +TEST(TypeSwitchTest, DefaultNullptrForPointerLike) { + struct Value { + void *ptr; + Value(const Value &other) : ptr(other.ptr) {} + Value(std::nullptr_t) : ptr(nullptr) {} + Value() : Value(nullptr) {} + }; + + float foo = 0.0f; + Value fooVal; + fooVal.ptr = &foo; + auto translate = [&](auto value) { + return TypeSwitch<Base *, Value>(&value) + .Case([&](DerivedA *) { return fooVal; }) + .Default(nullptr); + }; + EXPECT_EQ(&foo, translate(DerivedA()).ptr); + EXPECT_EQ(nullptr, translate(DerivedD()).ptr); +} diff --git a/llvm/unittests/Object/ELFObjectFileTest.cpp b/llvm/unittests/Object/ELFObjectFileTest.cpp index d6a3ca53b215..1e2955ae40a6 100644 --- a/llvm/unittests/Object/ELFObjectFileTest.cpp +++ b/llvm/unittests/Object/ELFObjectFileTest.cpp @@ -531,7 +531,7 @@ Sections: // Check that we can detect unsupported versions. SmallString<128> UnsupportedVersionYamlString(CommonYamlString); UnsupportedVersionYamlString += R"( - - Version: 5 + - Version: 6 BBRanges: - BaseAddress: 0x11111 BBEntries: @@ -543,7 +543,7 @@ Sections: { SCOPED_TRACE("unsupported version"); DoCheck(UnsupportedVersionYamlString, - "unsupported SHT_LLVM_BB_ADDR_MAP version: 5"); + "unsupported SHT_LLVM_BB_ADDR_MAP version: 6"); } SmallString<128> ZeroBBRangesYamlString(CommonYamlString); @@ -1181,8 +1181,8 @@ Sections: Type: SHT_LLVM_BB_ADDR_MAP # Link: 0 (by default, can be overriden) Entries: - - Version: 2 - Feature: 0x7 + - Version: 5 + Feature: 0x87 BBRanges: - BaseAddress: 0x44444 BBEntries: @@ -1205,7 +1205,8 @@ Sections: PGOAnalyses: - FuncEntryCount: 1000 PGOBBEntries: - - BBFreq: 1000 + - BBFreq: 1000 + PostLinkBBFreq: 50 Successors: - ID: 1 BrProb: 0x22222222 @@ -1243,8 +1244,8 @@ Sections: Type: SHT_LLVM_BB_ADDR_MAP # Link: 0 (by default, can be overriden) Entries: - - Version: 2 - Feature: 0xc + - Version: 5 + Feature: 0x8c BBRanges: - BaseAddress: 0x66666 BBEntries: @@ -1265,8 +1266,9 @@ Sections: PGOAnalyses: - PGOBBEntries: - Successors: - - ID: 1 - BrProb: 0x22222222 + - ID: 1 + BrProb: 0x22222222 + PostLinkBrFreq: 7 - ID: 2 BrProb: 0xcccccccc - Successors: @@ -1278,59 +1280,66 @@ Sections: BBAddrMap E1 = { {{0x11111, {{1, 0x0, 0x1, {false, true, false, false, false}, {}, 0}}}}}; PGOAnalysisMap P1 = { - 892, {}, {true, false, false, false, false, false, false}}; + 892, {}, {true, false, false, false, false, false, false, false}}; BBAddrMap E2 = { {{0x22222, {{2, 0x0, 0x2, {false, false, true, false, false}, {}, 0}}}}}; PGOAnalysisMap P2 = {{}, - {{BlockFrequency(343), {}}}, - {false, true, false, false, false, false, false}}; + {{BlockFrequency(343), 0, {}}}, + {false, true, false, false, false, false, false, false}}; BBAddrMap E3 = { {{0x33333, {{0, 0x0, 0x3, {false, true, true, false, false}, {}, 0}, {1, 0x3, 0x3, {false, false, true, false, false}, {}, 0}, {2, 0x6, 0x3, {false, false, false, false, false}, {}, 0}}}}}; - PGOAnalysisMap P3 = {{}, - {{{}, - {{1, BranchProbability::getRaw(0x1111'1111)}, - {2, BranchProbability::getRaw(0xeeee'eeee)}}}, - {{}, {{2, BranchProbability::getRaw(0xffff'ffff)}}}, - {{}, {}}}, - {false, false, true, false, false, false, false}}; + PGOAnalysisMap P3 = { + {}, + {{{}, + 0, + {{1, BranchProbability::getRaw(0x1111'1111), 0}, + {2, BranchProbability::getRaw(0xeeee'eeee), 0}}}, + {{}, 0, {{2, BranchProbability::getRaw(0xffff'ffff), 0}}}, + {{}, 0, {}}}, + {false, false, true, false, false, false, false, false}}; BBAddrMap E4 = { {{0x44444, {{0, 0x0, 0x4, {false, false, false, true, true}, {}, 0}, {1, 0x4, 0x4, {false, false, false, false, false}, {}, 0}, {2, 0x8, 0x4, {false, false, false, false, false}, {}, 0}, {3, 0xc, 0x4, {false, false, false, false, false}, {}, 0}}}}}; - PGOAnalysisMap P4 = { - 1000, - {{BlockFrequency(1000), - {{1, BranchProbability::getRaw(0x2222'2222)}, - {2, BranchProbability::getRaw(0x3333'3333)}, - {3, BranchProbability::getRaw(0xaaaa'aaaa)}}}, - {BlockFrequency(133), - {{2, BranchProbability::getRaw(0x1111'1111)}, - {3, BranchProbability::getRaw(0xeeee'eeee)}}}, - {BlockFrequency(18), {{3, BranchProbability::getRaw(0xffff'ffff)}}}, - {BlockFrequency(1000), {}}}, - {true, true, true, false, false, false, false}}; + PGOAnalysisMap P4 = {1000, + {{BlockFrequency(1000), + 50, + {{1, BranchProbability::getRaw(0x2222'2222), 0}, + {2, BranchProbability::getRaw(0x3333'3333), 0}, + {3, BranchProbability::getRaw(0xaaaa'aaaa), 0}}}, + {BlockFrequency(133), + 0, + {{2, BranchProbability::getRaw(0x1111'1111), 0}, + {3, BranchProbability::getRaw(0xeeee'eeee), 0}}}, + {BlockFrequency(18), + 0, + {{3, BranchProbability::getRaw(0xffff'ffff), 0}}}, + {BlockFrequency(1000), 0, {}}}, + {true, true, true, false, false, false, false, true}}; BBAddrMap E5 = { {{0x55555, {{2, 0x0, 0x2, {false, false, true, false, false}, {}, 0}}}}}; PGOAnalysisMap P5 = { - {}, {}, {false, false, false, false, false, false, false}}; + {}, {}, {false, false, false, false, false, false, false, false}}; BBAddrMap E6 = { {{0x66666, {{0, 0x0, 0x6, {false, true, true, false, false}, {}, 0}, {1, 0x6, 0x6, {false, false, true, false, false}, {}, 0}}}, {0x666661, {{2, 0x0, 0x6, {false, false, false, false, false}, {}, 0}}}}}; - PGOAnalysisMap P6 = {{}, - {{{}, - {{1, BranchProbability::getRaw(0x2222'2222)}, - {2, BranchProbability::getRaw(0xcccc'cccc)}}}, - {{}, {{2, BranchProbability::getRaw(0x8888'8888)}}}, - {{}, {}}}, - {false, false, true, true, false, false, false}}; + PGOAnalysisMap P6 = { + {}, + {{{}, + 0, + {{1, BranchProbability::getRaw(0x2222'2222), 7}, + {2, BranchProbability::getRaw(0xcccc'cccc), 0}}}, + {{}, 0, {{2, BranchProbability::getRaw(0x8888'8888), 0}}}, + {{}, 0, {}}}, + {false, false, true, true, false, false, false, true}}; std::vector<BBAddrMap> Section0BBAddrMaps = {E4, E5, E6}; std::vector<BBAddrMap> Section1BBAddrMaps = {E3}; @@ -1465,7 +1474,7 @@ Sections: DoCheckFails( TruncatedYamlString, /*TextSectionIndex=*/std::nullopt, "unable to read SHT_LLVM_BB_ADDR_MAP section with index 6: " - "unexpected end of data at offset 0xa while reading [0x3, 0xb)"); + "unexpected end of data at offset 0xa while reading [0x4, 0xc)"); // Check that we can read the other section's bb-address-maps which are // valid. DoCheckSucceeds(TruncatedYamlString, /*TextSectionIndex=*/2, diff --git a/llvm/unittests/Object/ELFTypesTest.cpp b/llvm/unittests/Object/ELFTypesTest.cpp index 1765e1500396..9e99b4a6d7bf 100644 --- a/llvm/unittests/Object/ELFTypesTest.cpp +++ b/llvm/unittests/Object/ELFTypesTest.cpp @@ -101,22 +101,24 @@ static_assert( "PGOAnalysisMap should use the same type for basic block ID as BBAddrMap"); TEST(ELFTypesTest, BBAddrMapFeaturesEncodingTest) { - const std::array<BBAddrMap::Features, 12> Decoded = { - {{false, false, false, false, false, false, false}, - {true, false, false, false, false, false, false}, - {false, true, false, false, false, false, false}, - {false, false, true, false, false, false, false}, - {false, false, false, true, false, false, false}, - {true, true, false, false, false, false, false}, - {false, true, true, false, false, false, false}, - {false, true, true, true, false, false, false}, - {true, true, true, true, false, false, false}, - {false, false, false, false, true, false, false}, - {false, false, false, false, false, true, false}, - {false, false, false, false, false, false, true}}}; - const std::array<uint8_t, 12> Encoded = { + const std::array<BBAddrMap::Features, 14> Decoded = { + {{false, false, false, false, false, false, false, false}, + {true, false, false, false, false, false, false, false}, + {false, true, false, false, false, false, false, false}, + {false, false, true, false, false, false, false, false}, + {false, false, false, true, false, false, false, false}, + {true, true, false, false, false, false, false, false}, + {false, true, true, false, false, false, false, false}, + {false, true, true, true, false, false, false, false}, + {true, true, true, true, false, false, false, false}, + {false, false, false, false, true, false, false, false}, + {false, false, false, false, false, true, false, false}, + {false, false, false, false, false, false, true, false}, + {false, false, false, false, false, false, false, true}, + {false, false, false, false, false, false, true, true}}}; + const std::array<uint16_t, 14> Encoded = { {0b0000, 0b0001, 0b0010, 0b0100, 0b1000, 0b0011, 0b0110, 0b1110, 0b1111, - 0b1'0000, 0b10'0000, 0b100'0000}}; + 0b1'0000, 0b10'0000, 0b100'0000, 0b1000'0000, 0b1100'0000}}; for (const auto &[Feat, EncodedVal] : llvm::zip(Decoded, Encoded)) EXPECT_EQ(Feat.encode(), EncodedVal); for (const auto &[Feat, EncodedVal] : llvm::zip(Decoded, Encoded)) { @@ -129,9 +131,9 @@ TEST(ELFTypesTest, BBAddrMapFeaturesEncodingTest) { TEST(ELFTypesTest, BBAddrMapFeaturesInvalidEncodingTest) { const std::array<std::string, 2> Errors = { - "invalid encoding for BBAddrMap::Features: 0x80", - "invalid encoding for BBAddrMap::Features: 0xf0"}; - const std::array<uint8_t, 2> Values = {{0b1000'0000, 0b1111'0000}}; + "invalid encoding for BBAddrMap::Features: 0x100", + "invalid encoding for BBAddrMap::Features: 0x1000"}; + const std::array<uint16_t, 2> Values = {{0b1'0000'0000, 0b1'0000'0000'0000}}; for (const auto &[Val, Error] : llvm::zip(Values, Errors)) { EXPECT_THAT_ERROR(BBAddrMap::Features::decode(Val).takeError(), FailedWithMessage(Error)); diff --git a/llvm/utils/git/code-format-helper.py b/llvm/utils/git/code-format-helper.py index dff7f78ce64a..f6b28f480b8a 100755 --- a/llvm/utils/git/code-format-helper.py +++ b/llvm/utils/git/code-format-helper.py @@ -486,8 +486,6 @@ def hook_main(): if fmt.has_tool(): if not fmt.run(args.changed_files, args): failed_fmts.append(fmt.name) - if fmt.comment: - comments.append(fmt.comment) else: print(f"Couldn't find {fmt.name}, can't check " + fmt.friendly_name.lower()) diff --git a/llvm/utils/lit/tests/Inputs/shtest-readfile/env.txt b/llvm/utils/lit/tests/Inputs/shtest-readfile/env.txt index c790b687c436..3e1937375497 100644 --- a/llvm/utils/lit/tests/Inputs/shtest-readfile/env.txt +++ b/llvm/utils/lit/tests/Inputs/shtest-readfile/env.txt @@ -1,6 +1,6 @@ ## Tests that readfile works with the env builtin. # RUN: echo -n "hello" > %t.1 -# RUN: env TEST=%{readfile:%t.1} python3 -c "import os; print(os.environ['TEST'])" +# RUN: env TEST=%{readfile:%t.1} %{python} -c "import os; print(os.environ['TEST'])" ## Fail the test so we can assert on the output. # RUN: not echo return
\ No newline at end of file diff --git a/llvm/utils/lit/tests/Inputs/shtest-readfile/lit.cfg b/llvm/utils/lit/tests/Inputs/shtest-readfile/lit.cfg index ee496674fdb6..80af27f57d35 100644 --- a/llvm/utils/lit/tests/Inputs/shtest-readfile/lit.cfg +++ b/llvm/utils/lit/tests/Inputs/shtest-readfile/lit.cfg @@ -10,6 +10,7 @@ use_lit_shell = lit.util.pythonize_bool(lit_shell_env) config.test_format = lit.formats.ShTest(execute_external=not use_lit_shell) config.test_source_root = None config.test_exec_root = None +config.substitutions.append(("%{python}", '"%s"' % (sys.executable))) # If we are testing with the external shell, remove the fake-externals from # PATH so that we use mkdir in the tests. diff --git a/llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit_unlimited.txt b/llvm/utils/lit/tests/Inputs/shtest-ulimit-nondarwin/ulimit_unlimited.txt index b8aa3d507171..4c687e306186 100644 --- a/llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit_unlimited.txt +++ b/llvm/utils/lit/tests/Inputs/shtest-ulimit-nondarwin/ulimit_unlimited.txt @@ -1,6 +1,6 @@ # RUN: ulimit -f 5 -# RUN: %{python} %S/print_limits.py +# RUN: %{python} %S/../shtest-ulimit/print_limits.py # RUN: ulimit -f unlimited -# RUN: %{python} %S/print_limits.py +# RUN: %{python} %S/../shtest-ulimit/print_limits.py # Fail the test so that we can assert on the output. # RUN: not echo return diff --git a/llvm/utils/lit/tests/shtest-readfile.py b/llvm/utils/lit/tests/shtest-readfile.py index be3915a1608b..218da2257bcf 100644 --- a/llvm/utils/lit/tests/shtest-readfile.py +++ b/llvm/utils/lit/tests/shtest-readfile.py @@ -12,7 +12,7 @@ # CHECK: # executed command: echo '%{readfile:[[TEMP_PATH]]{{[\\\/]}}absolute-paths.txt.tmp}' # CHECK-LABEL: FAIL: shtest-readfile :: env.txt ({{[^)]*}}) -# CHECK: env TEST=hello python3 -c "import os; print(os.environ['TEST'])" +# CHECK: env TEST=hello {{.*}} -c "import os; print(os.environ['TEST'])" # CHECK: # | hello # CHECK-LABEL: FAIL: shtest-readfile :: file-does-not-exist.txt ({{[^)]*}}) diff --git a/llvm/utils/lit/tests/shtest-ulimit-nondarwin.py b/llvm/utils/lit/tests/shtest-ulimit-nondarwin.py index 022e8b5f4189..286fd3d7e173 100644 --- a/llvm/utils/lit/tests/shtest-ulimit-nondarwin.py +++ b/llvm/utils/lit/tests/shtest-ulimit-nondarwin.py @@ -2,14 +2,20 @@ # ulimit does not work on non-POSIX platforms. # These tests are specific to options that Darwin does not support. -# UNSUPPORTED: system-windows, system-darwin, system-aix +# UNSUPPORTED: system-windows, system-darwin, system-aix, system-solaris # RUN: not %{lit} -a -v %{inputs}/shtest-ulimit-nondarwin | FileCheck %s -# CHECK: -- Testing: 1 tests{{.*}} +# CHECK: -- Testing: 2 tests{{.*}} # CHECK-LABEL: FAIL: shtest-ulimit :: ulimit_okay.txt ({{[^)]*}}) # CHECK: ulimit -v 1048576 # CHECK: ulimit -s 256 # CHECK: RLIMIT_AS=1073741824 # CHECK: RLIMIT_STACK=262144 + +# CHECK-LABEL: FAIL: shtest-ulimit :: ulimit_unlimited.txt ({{[^)]*}}) +# CHECK: ulimit -f 5 +# CHECK: RLIMIT_FSIZE=5 +# CHECK: ulimit -f unlimited +# CHECK: RLIMIT_FSIZE=-1 diff --git a/llvm/utils/lit/tests/shtest-ulimit.py b/llvm/utils/lit/tests/shtest-ulimit.py index e15e19092030..21e5a5e2491d 100644 --- a/llvm/utils/lit/tests/shtest-ulimit.py +++ b/llvm/utils/lit/tests/shtest-ulimit.py @@ -11,7 +11,7 @@ # RUN: not %{lit} -a -v %{inputs}/shtest-ulimit --order=lexical \ # RUN: | FileCheck -DBASE_NOFILE_LIMIT=%{readfile:%t.nofile_limit} %s -# CHECK: -- Testing: 4 tests{{.*}} +# CHECK: -- Testing: 3 tests{{.*}} # CHECK-LABEL: FAIL: shtest-ulimit :: ulimit-bad-arg.txt ({{[^)]*}}) # CHECK: ulimit -n @@ -25,9 +25,3 @@ # CHECK-LABEL: FAIL: shtest-ulimit :: ulimit_reset.txt ({{[^)]*}}) # CHECK: RLIMIT_NOFILE=[[BASE_NOFILE_LIMIT]] - -# CHECK-LABEL: FAIL: shtest-ulimit :: ulimit_unlimited.txt ({{[^)]*}}) -# CHECK: ulimit -f 5 -# CHECK: RLIMIT_FSIZE=5 -# CHECK: ulimit -f unlimited -# CHECK: RLIMIT_FSIZE=-1 diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td index f3674c3eecfe..ecd036d452b2 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td @@ -293,10 +293,6 @@ def MapOp : LinalgStructuredBase_Op<"map", [ // Implement functions necessary for DestinationStyleOpInterface. MutableOperandRange getDpsInitsMutable() { return getInitMutable(); } - SmallVector<OpOperand *> getOpOperandsMatchingBBargs() { - return getDpsInputOperands(); - } - bool payloadUsesValueFromOperand(OpOperand * opOperand) { if (isDpsInit(opOperand)) return false; return !getMatchingBlockArgument(opOperand).use_empty(); diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCTypeInterfaces.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCTypeInterfaces.td index 93e9e3d0689f..d1bbc7f206ce 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACCTypeInterfaces.td +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCTypeInterfaces.td @@ -261,6 +261,18 @@ def OpenACC_MappableTypeInterface : TypeInterface<"MappableType"> { >, InterfaceMethod< /*description=*/[{ + Returns true if the dimensions of this type are not known. This occurs + when the MLIR type does not encode dimensional information and there is + no associated descriptor or metadata in the current entity that would + make this information extractable. For example, an opaque pointer type + pointing to an array without dimension information would have unknown + dimensions. + }], + /*retTy=*/"bool", + /*methodName=*/"hasUnknownDimensions" + >, + InterfaceMethod< + /*description=*/[{ Returns explicit `acc.bounds` operations that envelop the whole data structure. These operations are inserted using the provided builder at the location set before calling this API. diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp index 41e333c621ed..3a307a0756d9 100644 --- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp +++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp @@ -935,7 +935,7 @@ static std::optional<uint32_t> mfmaTypeSelectCode(Type mlirElemType) { .Case([](Float6E2M3FNType) { return 2u; }) .Case([](Float6E3M2FNType) { return 3u; }) .Case([](Float4E2M1FNType) { return 4u; }) - .Default([](Type) { return std::nullopt; }); + .Default(std::nullopt); } /// If there is a scaled MFMA instruction for the input element types `aType` diff --git a/mlir/lib/Conversion/ArithToAMDGPU/ArithToAMDGPU.cpp b/mlir/lib/Conversion/ArithToAMDGPU/ArithToAMDGPU.cpp index 247dba101cfc..cfdcd9cc2d86 100644 --- a/mlir/lib/Conversion/ArithToAMDGPU/ArithToAMDGPU.cpp +++ b/mlir/lib/Conversion/ArithToAMDGPU/ArithToAMDGPU.cpp @@ -432,7 +432,7 @@ static Value getOriginalVectorValue(Value value) { current = op.getSource(); return false; }) - .Default([](Operation *) { return false; }); + .Default(false); if (!skipOp) { break; diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp index 25f1e1b184d6..425594b3382f 100644 --- a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp +++ b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp @@ -259,7 +259,7 @@ struct GPUShuffleConversion final : ConvertOpToLLVMPattern<gpu::ShuffleOp> { } return std::nullopt; }) - .Default([](auto) { return std::nullopt; }); + .Default(std::nullopt); } static std::optional<std::string> getFuncName(gpu::ShuffleMode mode, diff --git a/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp b/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp index e2c7d803e5a5..91c1aa55fdb4 100644 --- a/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp +++ b/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp @@ -46,7 +46,7 @@ static bool isZeroConstant(Value val) { [](auto floatAttr) { return floatAttr.getValue().isZero(); }) .Case<IntegerAttr>( [](auto intAttr) { return intAttr.getValue().isZero(); }) - .Default([](auto) { return false; }); + .Default(false); } static LogicalResult storeLoadPreconditions(PatternRewriter &rewriter, diff --git a/mlir/lib/Dialect/Arith/IR/ArithOps.cpp b/mlir/lib/Dialect/Arith/IR/ArithOps.cpp index 898d76ce8d9b..980442efdf70 100644 --- a/mlir/lib/Dialect/Arith/IR/ArithOps.cpp +++ b/mlir/lib/Dialect/Arith/IR/ArithOps.cpp @@ -2751,7 +2751,7 @@ std::optional<TypedAttr> mlir::arith::getNeutralElement(Operation *op) { .Case([](arith::MaxSIOp op) { return AtomicRMWKind::maxs; }) .Case([](arith::MinSIOp op) { return AtomicRMWKind::mins; }) .Case([](arith::MulIOp op) { return AtomicRMWKind::muli; }) - .Default([](Operation *op) { return std::nullopt; }); + .Default(std::nullopt); if (!maybeKind) { return std::nullopt; } diff --git a/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp index d9d69342e42a..8655ed3005a9 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp @@ -95,12 +95,7 @@ getBufferizedFunctionArgType(FuncOp funcOp, int64_t index, /// Return the FuncOp called by `callOp`. static FuncOp getCalledFunction(CallOpInterface callOp, SymbolTableCollection &symbolTables) { - SymbolRefAttr sym = - llvm::dyn_cast_if_present<SymbolRefAttr>(callOp.getCallableForCallee()); - if (!sym) - return nullptr; - return dyn_cast_or_null<FuncOp>( - symbolTables.lookupNearestSymbolFrom(callOp, sym)); + return dyn_cast_or_null<FuncOp>(callOp.resolveCallableInTable(&symbolTables)); } /// Return the FuncOp called by `callOp`. diff --git a/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp index aa53f94fe839..c233e24c2a15 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp @@ -285,12 +285,8 @@ static void removeBufferizationAttributes(BlockArgument bbArg) { static func::FuncOp getCalledFunction(func::CallOp callOp, mlir::SymbolTableCollection &symbolTable) { - SymbolRefAttr sym = - llvm::dyn_cast_if_present<SymbolRefAttr>(callOp.getCallableForCallee()); - if (!sym) - return nullptr; return dyn_cast_or_null<func::FuncOp>( - symbolTable.lookupNearestSymbolFrom(callOp, sym)); + callOp.resolveCallableInTable(&symbolTable)); } /// Return "true" if the given function signature has tensor semantics. diff --git a/mlir/lib/Dialect/GPU/Transforms/EliminateBarriers.cpp b/mlir/lib/Dialect/GPU/Transforms/EliminateBarriers.cpp index d2c2138d6163..025d1acf8d6b 100644 --- a/mlir/lib/Dialect/GPU/Transforms/EliminateBarriers.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/EliminateBarriers.cpp @@ -330,7 +330,7 @@ static Value getBase(Value v) { v = op.getSrc(); return true; }) - .Default([](Operation *) { return false; }); + .Default(false); if (!shouldContinue) break; } @@ -354,7 +354,7 @@ static Value propagatesCapture(Operation *op) { .Case([](memref::TransposeOp transpose) { return transpose.getIn(); }) .Case<memref::ExpandShapeOp, memref::CollapseShapeOp>( [](auto op) { return op.getSrc(); }) - .Default([](Operation *) { return Value(); }); + .Default(nullptr); } /// Returns `true` if the given operation is known to capture the given value, @@ -371,7 +371,7 @@ static std::optional<bool> getKnownCapturingStatus(Operation *op, Value v) { // These operations are known not to capture. .Case([](memref::DeallocOp) { return false; }) // By default, we don't know anything. - .Default([](Operation *) { return std::nullopt; }); + .Default(std::nullopt); } /// Returns `true` if the value may be captured by any of its users, i.e., if diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp index 3eae67f4c1f9..2731069d6ef5 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp @@ -698,7 +698,7 @@ static void destructureIndices(Type currType, ArrayRef<GEPArg> indices, return structType.getBody()[memberIndex]; return nullptr; }) - .Default(Type(nullptr)); + .Default(nullptr); } } diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp index cee943d2d86c..7d9058c26256 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp @@ -1111,7 +1111,7 @@ memsetCanUsesBeRemoved(MemsetIntr op, const MemorySlot &slot, .Case<IntegerType, FloatType>([](auto type) { return type.getWidth() % 8 == 0 && type.getWidth() > 0; }) - .Default([](Type) { return false; }); + .Default(false); if (!canConvertType) return false; diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp index ac35eea66e9d..ce93d18f56d3 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp @@ -798,7 +798,7 @@ static bool isCompatibleImpl(Type type, DenseSet<Type> &compatibleTypes) { // clang-format on .Case<PtrLikeTypeInterface>( [](Type type) { return isCompatiblePtrType(type); }) - .Default([](Type) { return false; }); + .Default(false); if (!result) compatibleTypes.erase(type); diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp index cbc565b0c8cb..3dc45edf4a23 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -1474,6 +1474,8 @@ void MapOp::getAsmBlockArgumentNames(Region ®ion, OpAsmSetValueNameFn setNameFn) { for (Value v : getRegionInputArgs()) setNameFn(v, "in"); + for (Value v : getRegionOutputArgs()) + setNameFn(v, "init"); } void MapOp::getAsmResultNames(function_ref<void(Value, StringRef)> setNameFn) { @@ -1495,14 +1497,14 @@ void MapOp::build( if (bodyBuild) buildGenericRegion(builder, result.location, *result.regions.front(), - inputs, /*outputs=*/{}, bodyBuild); + inputs, /*outputs=*/{init}, bodyBuild); } static void addBodyWithPayloadOp(OpAsmParser &parser, OperationState &result, const OperationName &payloadOpName, const NamedAttrList &payloadOpAttrs, ArrayRef<Value> operands, - bool initFirst = false) { + bool initFirst = false, bool mapInit = true) { OpBuilder b(parser.getContext()); Region *body = result.addRegion(); Block &block = body->emplaceBlock(); @@ -1516,12 +1518,13 @@ static void addBodyWithPayloadOp(OpAsmParser &parser, OperationState &result, // If initFirst flag is enabled, we consider init as the first position of // payload operands. if (initFirst) { - payloadOpOperands.push_back(block.getArguments().back()); + if (mapInit) + payloadOpOperands.push_back(block.getArguments().back()); for (const auto &arg : block.getArguments().drop_back()) payloadOpOperands.push_back(arg); } else { payloadOpOperands = {block.getArguments().begin(), - block.getArguments().end()}; + block.getArguments().end() - int(!mapInit)}; } Operation *payloadOp = b.create( @@ -1553,8 +1556,8 @@ ParseResult MapOp::parse(OpAsmParser &parser, OperationState &result) { if (payloadOpName.has_value()) { if (!result.operands.empty()) addBodyWithPayloadOp(parser, result, payloadOpName.value(), - payloadOpAttrs, - ArrayRef(result.operands).drop_back()); + payloadOpAttrs, ArrayRef(result.operands), false, + false); else result.addRegion(); } else { @@ -1570,7 +1573,11 @@ ParseResult MapOp::parse(OpAsmParser &parser, OperationState &result) { return success(); } -static bool canUseShortForm(Block *body, bool initFirst = false) { +static bool canUseShortForm(Block *body, bool initFirst = false, + bool mapInit = true) { + // `intFirst == true` implies that we want to map init arg + if (initFirst && !mapInit) + return false; // Check if the body can be printed in short form. The following 4 conditions // must be satisfied: @@ -1582,7 +1589,7 @@ static bool canUseShortForm(Block *body, bool initFirst = false) { // 2) The payload op must have the same number of operands as the number of // block arguments. if (payload.getNumOperands() == 0 || - payload.getNumOperands() != body->getNumArguments()) + payload.getNumOperands() != body->getNumArguments() - int(!mapInit)) return false; // 3) If `initFirst` is true (e.g., for reduction ops), the init block @@ -1600,7 +1607,8 @@ static bool canUseShortForm(Block *body, bool initFirst = false) { } } else { for (const auto &[operand, bbArg] : - llvm::zip(payload.getOperands(), body->getArguments())) { + llvm::zip(payload.getOperands(), + body->getArguments().drop_back(int(!mapInit)))) { if (bbArg != operand) return false; } @@ -1632,7 +1640,8 @@ static void printShortForm(OpAsmPrinter &p, Operation *payloadOp) { void MapOp::print(OpAsmPrinter &p) { Block *mapper = getBody(); - bool useShortForm = canUseShortForm(mapper); + bool useShortForm = + canUseShortForm(mapper, /*initFirst=*/false, /*mapInit*/ false); if (useShortForm) { printShortForm(p, &mapper->getOperations().front()); } @@ -1658,11 +1667,13 @@ LogicalResult MapOp::verify() { auto *bodyBlock = getBody(); auto blockArgs = bodyBlock->getArguments(); - // Checks if the number of `inputs` match the arity of the `mapper` region. - if (getInputs().size() != blockArgs.size()) + // Checks if the number of `inputs` + `init` match the arity of the `mapper` + // region. + if (getInputs().size() + 1 != blockArgs.size()) return emitOpError() << "expects number of operands to match the arity of " "mapper, but got: " - << getInputs().size() << " and " << blockArgs.size(); + << getInputs().size() + 1 << " and " + << blockArgs.size(); // The parameters of mapper should all match the element type of inputs. for (const auto &[bbArgType, inputArg] : diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp index 8b8924448633..b09112bcf0bb 100644 --- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp +++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp @@ -4499,7 +4499,7 @@ DiagnosedSilenceableFailure transform::DecomposeWinogradOp::applyToOne( maybeTransformed = decomposeWinogradOutputTransformOp(rewriter, op); return true; }) - .Default([&](Operation *op) { return false; }); + .Default(false); if (!supported) { DiagnosedSilenceableFailure diag = diff --git a/mlir/lib/Dialect/Linalg/Transforms/Generalization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Generalization.cpp index 3e31393fd51e..75bb1757a55f 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Generalization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Generalization.cpp @@ -31,10 +31,8 @@ using namespace mlir; using namespace mlir::linalg; static LogicalResult generalizeNamedOpPrecondition(LinalgOp linalgOp) { - // Bailout if `linalgOp` is already a generic or a linalg.map. We cannot - // trivially generalize a `linalg.map`, as it does not use the output as - // region arguments in the block. - if (isa<GenericOp>(linalgOp) || isa<MapOp>(linalgOp)) + // Bailout if `linalgOp` is already a generic. + if (isa<GenericOp>(linalgOp)) return failure(); // Check if the operation has exactly one region. if (linalgOp->getNumRegions() != 1) { diff --git a/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp b/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp index f05ffa8334d9..6519c4f64dd0 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp @@ -322,7 +322,7 @@ promoteSubViews(ImplicitLocOpBuilder &b, tmp = arith::ConstantOp::create(b, IntegerAttr::get(et, 0)); return complex::CreateOp::create(b, t, tmp, tmp); }) - .Default([](auto) { return Value(); }); + .Default(nullptr); if (!fillVal) return failure(); linalg::FillOp::create(b, fillVal, promotionInfo->fullLocalView); diff --git a/mlir/lib/Dialect/Linalg/Transforms/SimplifyDepthwiseConv.cpp b/mlir/lib/Dialect/Linalg/Transforms/SimplifyDepthwiseConv.cpp index 27ccf3c2ba14..6becc1f29afb 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/SimplifyDepthwiseConv.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/SimplifyDepthwiseConv.cpp @@ -89,7 +89,7 @@ matchAndReplaceDepthwiseConv(Operation *operation, Value input, Value kernel, ValueRange{input, collapsedKernel, iZp, kZp}, ValueRange{collapsedInit}, stride, dilation); }) - .Default([](Operation *op) { return nullptr; }); + .Default(nullptr); if (!newConv) return failure(); for (auto attr : preservedAttrs) diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index 0f317eac8fa4..cb6199f026e0 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -656,7 +656,7 @@ mlir::linalg::getCombinerOpKind(Operation *combinerOp) { [&](auto op) { return CombiningKind::MUL; }) .Case<arith::OrIOp>([&](auto op) { return CombiningKind::OR; }) .Case<arith::XOrIOp>([&](auto op) { return CombiningKind::XOR; }) - .Default([&](auto op) { return std::nullopt; }); + .Default(std::nullopt); } /// Check whether `outputOperand` is a reduction with a single combiner diff --git a/mlir/lib/Dialect/MemRef/Transforms/FlattenMemRefs.cpp b/mlir/lib/Dialect/MemRef/Transforms/FlattenMemRefs.cpp index 1208fddf37e0..e6850890bf8f 100644 --- a/mlir/lib/Dialect/MemRef/Transforms/FlattenMemRefs.cpp +++ b/mlir/lib/Dialect/MemRef/Transforms/FlattenMemRefs.cpp @@ -104,7 +104,7 @@ static Value getTargetMemref(Operation *op) { vector::MaskedStoreOp, vector::TransferReadOp, vector::TransferWriteOp>( [](auto op) { return op.getBase(); }) - .Default([](auto) { return Value{}; }); + .Default(nullptr); } template <typename T> diff --git a/mlir/lib/Dialect/SCF/Transforms/LoopCanonicalization.cpp b/mlir/lib/Dialect/SCF/Transforms/LoopCanonicalization.cpp index 4ebd90dbcc1d..d380c46f7fbe 100644 --- a/mlir/lib/Dialect/SCF/Transforms/LoopCanonicalization.cpp +++ b/mlir/lib/Dialect/SCF/Transforms/LoopCanonicalization.cpp @@ -55,7 +55,7 @@ static bool isShapePreserving(ForOp forOp, int64_t arg) { ? forOp.getInitArgs()[opResult.getResultNumber()] : Value(); }) - .Default([&](auto op) { return Value(); }); + .Default(nullptr); } return false; } diff --git a/mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp b/mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp index 0c8114d5e957..938952ed273c 100644 --- a/mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp +++ b/mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp @@ -346,7 +346,7 @@ LogicalResult spirv::CompositeConstructOp::verify() { llvm::TypeSwitch<Type, Type>(getType()) .Case<spirv::CooperativeMatrixType>( [](auto coopType) { return coopType.getElementType(); }) - .Default([](Type) { return nullptr; }); + .Default(nullptr); // Case 1. -- matrices. if (coopElementType) { @@ -1708,7 +1708,7 @@ LogicalResult spirv::MatrixTimesScalarOp::verify() { llvm::TypeSwitch<Type, Type>(getMatrix().getType()) .Case<spirv::CooperativeMatrixType, spirv::MatrixType>( [](auto matrixType) { return matrixType.getElementType(); }) - .Default([](Type) { return nullptr; }); + .Default(nullptr); assert(elementType && "Unhandled type"); diff --git a/mlir/lib/Dialect/SPIRV/IR/SPIRVTypes.cpp b/mlir/lib/Dialect/SPIRV/IR/SPIRVTypes.cpp index f895807ea1d1..d1e275d590f7 100644 --- a/mlir/lib/Dialect/SPIRV/IR/SPIRVTypes.cpp +++ b/mlir/lib/Dialect/SPIRV/IR/SPIRVTypes.cpp @@ -731,7 +731,7 @@ std::optional<int64_t> SPIRVType::getSizeInBytes() { return *elementSize * type.getNumElements(); return std::nullopt; }) - .Default(std::optional<int64_t>()); + .Default(std::nullopt); } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/SPIRV/Transforms/SPIRVConversion.cpp b/mlir/lib/Dialect/SPIRV/Transforms/SPIRVConversion.cpp index 88e1ab6ab1e4..cb9b7f6ec2fd 100644 --- a/mlir/lib/Dialect/SPIRV/Transforms/SPIRVConversion.cpp +++ b/mlir/lib/Dialect/SPIRV/Transforms/SPIRVConversion.cpp @@ -1467,7 +1467,7 @@ mlir::spirv::getNativeVectorShape(Operation *op) { return TypeSwitch<Operation *, std::optional<SmallVector<int64_t>>>(op) .Case<vector::ReductionOp, vector::TransposeOp>( [](auto typedOp) { return getNativeVectorShapeImpl(typedOp); }) - .Default([](Operation *) { return std::nullopt; }); + .Default(std::nullopt); } LogicalResult mlir::spirv::unrollVectorsInSignatures(Operation *op) { diff --git a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp index bce964e47a3b..c607ece418df 100644 --- a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp @@ -579,6 +579,7 @@ static Value lowerGenerateLikeOpBody(RewriterBase &rewriter, Location loc, linalg::MapOp::create(rewriter, loc, tensorType, /*inputs=*/ValueRange(), /*init=*/tensorDestination); Block &linalgBody = linalgOp.getMapper().emplaceBlock(); + linalgBody.addArgument(tensorType.getElementType(), loc); // Create linalg::IndexOps. rewriter.setInsertionPointToStart(&linalgBody); @@ -1068,6 +1069,7 @@ struct SplatOpInterface /*inputs=*/ValueRange(), /*init=*/*tensorAlloc); Block &linalgBody = linalgOp.getMapper().emplaceBlock(); + linalgBody.addArgument(tensorType.getElementType(), loc); // Create linalg::IndexOps. rewriter.setInsertionPointToStart(&linalgBody); diff --git a/mlir/lib/Dialect/Tensor/Transforms/RewriteAsConstant.cpp b/mlir/lib/Dialect/Tensor/Transforms/RewriteAsConstant.cpp index 69e649d2eebe..bc4f5a5ac7f2 100644 --- a/mlir/lib/Dialect/Tensor/Transforms/RewriteAsConstant.cpp +++ b/mlir/lib/Dialect/Tensor/Transforms/RewriteAsConstant.cpp @@ -189,7 +189,7 @@ struct PadOpToConstant final : public OpRewritePattern<PadOp> { return constantFoldPadOp<llvm::APInt>( rewriter, loc, inputAttr, integerAttr, *lowPad, *highPad); }) - .Default(Value()); + .Default(nullptr); if (!newOp) return rewriter.notifyMatchFailure(padTensorOp, diff --git a/mlir/lib/TableGen/Type.cpp b/mlir/lib/TableGen/Type.cpp index b31377e0de3e..0f1bf83d1987 100644 --- a/mlir/lib/TableGen/Type.cpp +++ b/mlir/lib/TableGen/Type.cpp @@ -56,7 +56,7 @@ std::optional<StringRef> TypeConstraint::getBuilderCall() const { StringRef value = init->getValue(); return value.empty() ? std::optional<StringRef>() : value; }) - .Default([](auto *) { return std::nullopt; }); + .Default(std::nullopt); } // Return the C++ type for this type (which may just be ::mlir::Type). diff --git a/mlir/lib/Target/LLVMIR/DebugTranslation.cpp b/mlir/lib/Target/LLVMIR/DebugTranslation.cpp index eeb87253e5eb..e3bcf2749be1 100644 --- a/mlir/lib/Target/LLVMIR/DebugTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/DebugTranslation.cpp @@ -390,7 +390,7 @@ llvm::DISubrange *DebugTranslation::translateImpl(DISubrangeAttr attr) { .Case<>([&](LLVM::DIGlobalVariableAttr global) { return translate(global); }) - .Default([&](Attribute attr) { return nullptr; }); + .Default(nullptr); return metadata; }; return llvm::DISubrange::get(llvmCtx, getMetadataOrNull(attr.getCount()), @@ -420,10 +420,10 @@ DebugTranslation::translateImpl(DIGenericSubrangeAttr attr) { .Case([&](LLVM::DILocalVariableAttr local) { return translate(local); }) - .Case<>([&](LLVM::DIGlobalVariableAttr global) { + .Case([&](LLVM::DIGlobalVariableAttr global) { return translate(global); }) - .Default([&](Attribute attr) { return nullptr; }); + .Default(nullptr); return metadata; }; return llvm::DIGenericSubrange::get(llvmCtx, diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index f28454075f1d..8edec990eaab 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -4084,12 +4084,13 @@ static omp::MapInfoOp getFirstOrLastMappedMemberPtr(omp::MapInfoOp mapInfo, /// /// Fortran /// map(tofrom: array(2:5, 3:2)) -/// or -/// C++ -/// map(tofrom: array[1:4][2:3]) +/// /// We must calculate the initial pointer offset to pass across, this function /// performs this using bounds. /// +/// TODO/WARNING: This only supports Fortran's column major indexing currently +/// as is noted in the note below and comments in the function, we must extend +/// this function when we add a C++ frontend. /// NOTE: which while specified in row-major order it currently needs to be /// flipped for Fortran's column order array allocation and access (as /// opposed to C++'s row-major, hence the backwards processing where order is @@ -4125,46 +4126,28 @@ calculateBoundsOffset(LLVM::ModuleTranslation &moduleTranslation, // with a pointer that's being treated like an array and we have the // underlying type e.g. an i32, or f64 etc, e.g. a fortran descriptor base // address (pointer pointing to the actual data) so we must caclulate the - // offset using a single index which the following two loops attempts to - // compute. - - // Calculates the size offset we need to make per row e.g. first row or - // column only needs to be offset by one, but the next would have to be - // the previous row/column offset multiplied by the extent of current row. + // offset using a single index which the following loop attempts to + // compute using the standard column-major algorithm e.g for a 3D array: // - // For example ([1][10][100]): + // ((((c_idx * b_len) + b_idx) * a_len) + a_idx) // - // - First row/column we move by 1 for each index increment - // - Second row/column we move by 1 (first row/column) * 10 (extent/size of - // current) for 10 for each index increment - // - Third row/column we would move by 10 (second row/column) * - // (extent/size of current) 100 for 1000 for each index increment - std::vector<llvm::Value *> dimensionIndexSizeOffset{builder.getInt64(1)}; - for (size_t i = 1; i < bounds.size(); ++i) { - if (auto boundOp = dyn_cast_if_present<omp::MapBoundsOp>( - bounds[i].getDefiningOp())) { - dimensionIndexSizeOffset.push_back(builder.CreateMul( - moduleTranslation.lookupValue(boundOp.getExtent()), - dimensionIndexSizeOffset[i - 1])); - } - } - - // Now that we have calculated how much we move by per index, we must - // multiply each lower bound offset in indexes by the size offset we - // have calculated in the previous and accumulate the results to get - // our final resulting offset. + // It is of note that it's doing column-major rather than row-major at the + // moment, but having a way for the frontend to indicate which major format + // to use or standardizing/canonicalizing the order of the bounds to compute + // the offset may be useful in the future when there's other frontends with + // different formats. + std::vector<llvm::Value *> dimensionIndexSizeOffset; for (int i = bounds.size() - 1; i >= 0; --i) { if (auto boundOp = dyn_cast_if_present<omp::MapBoundsOp>( bounds[i].getDefiningOp())) { - if (idx.empty()) - idx.emplace_back(builder.CreateMul( - moduleTranslation.lookupValue(boundOp.getLowerBound()), - dimensionIndexSizeOffset[i])); + if (i == ((int)bounds.size() - 1)) + idx.emplace_back( + moduleTranslation.lookupValue(boundOp.getLowerBound())); else idx.back() = builder.CreateAdd( - idx.back(), builder.CreateMul(moduleTranslation.lookupValue( - boundOp.getLowerBound()), - dimensionIndexSizeOffset[i])); + builder.CreateMul(idx.back(), moduleTranslation.lookupValue( + boundOp.getExtent())), + moduleTranslation.lookupValue(boundOp.getLowerBound())); } } } diff --git a/mlir/test/Dialect/Linalg/canonicalize.mlir b/mlir/test/Dialect/Linalg/canonicalize.mlir index 26d2d98572f4..f4020ede4854 100644 --- a/mlir/test/Dialect/Linalg/canonicalize.mlir +++ b/mlir/test/Dialect/Linalg/canonicalize.mlir @@ -1423,7 +1423,7 @@ func.func @transpose_buffer(%input: memref<?xf32>, func.func @recursive_effect(%arg : tensor<1xf32>) { %init = arith.constant dense<0.0> : tensor<1xf32> %mapped = linalg.map ins(%arg:tensor<1xf32>) outs(%init :tensor<1xf32>) - (%in : f32) { + (%in : f32, %out: f32) { vector.print %in : f32 linalg.yield %in : f32 } diff --git a/mlir/test/Dialect/Linalg/generalize-named-ops.mlir b/mlir/test/Dialect/Linalg/generalize-named-ops.mlir index ae07b1b82228..dcdd6c8db4b2 100644 --- a/mlir/test/Dialect/Linalg/generalize-named-ops.mlir +++ b/mlir/test/Dialect/Linalg/generalize-named-ops.mlir @@ -386,18 +386,24 @@ func.func @generalize_batch_reduce_gemm_bf16(%lhs: memref<7x8x9xbf16>, %rhs: mem // ----- -// CHECK-LABEL: generalize_linalg_map -func.func @generalize_linalg_map(%arg0: memref<1x8x8x8xf32>) { +func.func @generalize_linalg_map(%arg0: memref<1x8x8x8xf32>, %arg1: memref<1x8x8x8xf32>, %arg2: memref<1x8x8x8xf32>) { %cst = arith.constant 0.000000e+00 : f32 - // CHECK: linalg.map - // CHECK-NOT: linalg.generic - linalg.map outs(%arg0 : memref<1x8x8x8xf32>) - () { - linalg.yield %cst : f32 - } + linalg.map {arith.addf} ins(%arg0, %arg1: memref<1x8x8x8xf32>, memref<1x8x8x8xf32>) outs(%arg2 : memref<1x8x8x8xf32>) return } +// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> + +// CHECK: @generalize_linalg_map + +// CHECK: linalg.generic +// CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP0]], #[[MAP0]]] +// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel"]} +// CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x8x8x8xf32>, memref<1x8x8x8xf32>) outs(%{{.+}} : memref<1x8x8x8xf32> +// CHECK: ^{{.+}}(%[[BBARG0:.+]]: f32, %[[BBARG1:.+]]: f32, %[[BBARG2:.+]]: f32) +// CHECK: %[[ADD:.+]] = arith.addf %[[BBARG0]], %[[BBARG1]] : f32 +// CHECK: linalg.yield %[[ADD]] : f32 + // ----- func.func @generalize_add(%lhs: memref<7x14x21xf32>, %rhs: memref<7x14x21xf32>, diff --git a/mlir/test/Dialect/Linalg/invalid.mlir b/mlir/test/Dialect/Linalg/invalid.mlir index 40bf4d19d6b9..fabc8e610612 100644 --- a/mlir/test/Dialect/Linalg/invalid.mlir +++ b/mlir/test/Dialect/Linalg/invalid.mlir @@ -681,7 +681,7 @@ func.func @map_binary_wrong_yield_operands( %add = linalg.map ins(%lhs, %rhs : tensor<64xf32>, tensor<64xf32>) outs(%init:tensor<64xf32>) - (%lhs_elem: f32, %rhs_elem: f32) { + (%lhs_elem: f32, %rhs_elem: f32, %out: f32) { %0 = arith.addf %lhs_elem, %rhs_elem: f32 // expected-error @+1{{'linalg.yield' op expected number of yield values (2) to match the number of inits / outs operands of the enclosing LinalgOp (1)}} linalg.yield %0, %0: f32, f32 @@ -694,11 +694,11 @@ func.func @map_binary_wrong_yield_operands( func.func @map_input_mapper_arity_mismatch( %lhs: tensor<64xf32>, %rhs: tensor<64xf32>, %init: tensor<64xf32>) -> tensor<64xf32> { - // expected-error@+1{{'linalg.map' op expects number of operands to match the arity of mapper, but got: 2 and 3}} + // expected-error@+1{{'linalg.map' op expects number of operands to match the arity of mapper, but got: 3 and 4}} %add = linalg.map ins(%lhs, %rhs : tensor<64xf32>, tensor<64xf32>) outs(%init:tensor<64xf32>) - (%lhs_elem: f32, %rhs_elem: f32, %extra_elem: f32) { + (%lhs_elem: f32, %rhs_elem: f32, %out: f32, %extra_elem: f32) { %0 = arith.addf %lhs_elem, %rhs_elem: f32 linalg.yield %0: f32 } @@ -714,7 +714,7 @@ func.func @map_input_mapper_type_mismatch( %add = linalg.map ins(%lhs, %rhs : tensor<64xf32>, tensor<64xf32>) outs(%init:tensor<64xf32>) - (%lhs_elem: f64, %rhs_elem: f64) { + (%lhs_elem: f64, %rhs_elem: f64, %out: f32) { %0 = arith.addf %lhs_elem, %rhs_elem: f64 linalg.yield %0: f64 } @@ -730,7 +730,7 @@ func.func @map_input_output_shape_mismatch( %add = linalg.map ins(%lhs, %rhs : tensor<64x64xf32>, tensor<64x64xf32>) outs(%init:tensor<32xf32>) - (%lhs_elem: f32, %rhs_elem: f32) { + (%lhs_elem: f32, %rhs_elem: f32, %out: f32) { %0 = arith.addf %lhs_elem, %rhs_elem: f32 linalg.yield %0: f32 } diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir index 1df15e85bac1..85cc1ffc2029 100644 --- a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir +++ b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir @@ -339,7 +339,7 @@ func.func @map_binary(%lhs: tensor<64xf32>, %rhs: tensor<64xf32>, %add = linalg.map ins(%lhs, %rhs: tensor<64xf32>, tensor<64xf32>) outs(%init:tensor<64xf32>) - (%lhs_elem: f32, %rhs_elem: f32) { + (%lhs_elem: f32, %rhs_elem: f32, %out: f32) { %0 = arith.addf %lhs_elem, %rhs_elem: f32 linalg.yield %0: f32 } diff --git a/mlir/test/Dialect/Linalg/roundtrip.mlir b/mlir/test/Dialect/Linalg/roundtrip.mlir index 563013d4083a..74928920c695 100644 --- a/mlir/test/Dialect/Linalg/roundtrip.mlir +++ b/mlir/test/Dialect/Linalg/roundtrip.mlir @@ -341,7 +341,7 @@ func.func @mixed_parallel_reduced_results(%arg0 : tensor<?x?x?xf32>, func.func @map_no_inputs(%init: tensor<64xf32>) -> tensor<64xf32> { %add = linalg.map outs(%init:tensor<64xf32>) - () { + (%out: f32) { %0 = arith.constant 0.0: f32 linalg.yield %0: f32 } @@ -349,7 +349,7 @@ func.func @map_no_inputs(%init: tensor<64xf32>) -> tensor<64xf32> { } // CHECK-LABEL: func @map_no_inputs // CHECK: linalg.map outs -// CHECK-NEXT: () { +// CHECK-NEXT: (%[[OUT:.*]]: f32) { // CHECK-NEXT: arith.constant // CHECK-NEXT: linalg.yield // CHECK-NEXT: } @@ -361,7 +361,7 @@ func.func @map_binary(%lhs: tensor<64xf32>, %rhs: tensor<64xf32>, %add = linalg.map ins(%lhs, %rhs: tensor<64xf32>, tensor<64xf32>) outs(%init:tensor<64xf32>) - (%lhs_elem: f32, %rhs_elem: f32) { + (%lhs_elem: f32, %rhs_elem: f32, %out: f32) { %0 = arith.addf %lhs_elem, %rhs_elem: f32 linalg.yield %0: f32 } @@ -378,7 +378,7 @@ func.func @map_binary_memref(%lhs: memref<64xf32>, %rhs: memref<64xf32>, linalg.map ins(%lhs, %rhs: memref<64xf32>, memref<64xf32>) outs(%init:memref<64xf32>) - (%lhs_elem: f32, %rhs_elem: f32) { + (%lhs_elem: f32, %rhs_elem: f32, %out: f32) { %0 = arith.addf %lhs_elem, %rhs_elem: f32 linalg.yield %0: f32 } @@ -393,7 +393,7 @@ func.func @map_unary(%input: tensor<64xf32>, %init: tensor<64xf32>) -> tensor<64 %abs = linalg.map ins(%input:tensor<64xf32>) outs(%init:tensor<64xf32>) - (%input_elem: f32) { + (%input_elem: f32, %out: f32) { %0 = math.absf %input_elem: f32 linalg.yield %0: f32 } @@ -408,7 +408,7 @@ func.func @map_unary_memref(%input: memref<64xf32>, %init: memref<64xf32>) { linalg.map ins(%input:memref<64xf32>) outs(%init:memref<64xf32>) - (%input_elem: f32) { + (%input_elem: f32, %out: f32) { %0 = math.absf %input_elem: f32 linalg.yield %0: f32 } @@ -604,7 +604,7 @@ func.func @map_arith_with_attr(%lhs: tensor<64xf32>, %rhs: tensor<64xf32>, %add = linalg.map ins(%lhs, %rhs: tensor<64xf32>, tensor<64xf32>) outs(%init:tensor<64xf32>) - (%lhs_elem: f32, %rhs_elem: f32) { + (%lhs_elem: f32, %rhs_elem: f32, %out: f32) { %0 = arith.addf %lhs_elem, %rhs_elem fastmath<fast> : f32 linalg.yield %0: f32 } @@ -622,7 +622,7 @@ func.func @map_arith_with_attr(%lhs: tensor<64xf32>, %rhs: tensor<64xf32>, func.func @map_not_short_form_compatible(%lhs: tensor<1x32xf32>, %rhs: tensor<1x32xf32>, %init: tensor<1x32xf32>) -> tensor<1x32xf32> { %mapped = linalg.map ins(%lhs, %rhs : tensor<1x32xf32>, tensor<1x32xf32>) outs(%init : tensor<1x32xf32>) - (%in_1: f32, %in_2: f32) { + (%in_1: f32, %in_2: f32, %out: f32) { %1 = arith.maximumf %in_1, %in_2 : f32 linalg.yield %in_1 : f32 } @@ -634,7 +634,7 @@ func.func @map_not_short_form_compatible(%lhs: tensor<1x32xf32>, %rhs: tensor<1x // CHECK-NOT: linalg.map { arith.maximumf } ins(%[[LHS]] : tensor<1x32xf32> // CHECK: linalg.map ins(%[[LHS]], %[[RHS]] : tensor<1x32xf32>, tensor<1x32xf32>) // CHECK-SAME: outs(%[[INIT]] : tensor<1x32xf32>) -// CHECK-NEXT: (%[[IN1:.*]]: f32, %[[IN2:.*]]: f32) { +// CHECK-NEXT: (%[[IN1:.*]]: f32, %[[IN2:.*]]: f32, %[[OUT:.*]]: f32) { // CHECK-NEXT: %[[MAX_RESULT:.*]] = arith.maximumf %[[IN1]], %[[IN2]] : f32 // CHECK-NEXT: linalg.yield %[[IN1]] : f32 // CHECK-NEXT: } diff --git a/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir b/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir index 93a03369be23..aa2c1da4b627 100644 --- a/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir +++ b/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir @@ -356,7 +356,7 @@ func.func @vectorize_map(%arg0: memref<64xf32>, %arg1: memref<64xf32>, %arg2: memref<64xf32>) { linalg.map ins(%arg0, %arg1 : memref<64xf32>, memref<64xf32>) outs(%arg2 : memref<64xf32>) - (%in: f32, %in_0: f32) { + (%in: f32, %in_0: f32, %out: f32) { %0 = arith.addf %in, %in_0 : f32 linalg.yield %0 : f32 } diff --git a/mlir/test/Dialect/Tensor/bufferize.mlir b/mlir/test/Dialect/Tensor/bufferize.mlir index 296ca02564e3..5eb2360a29b8 100644 --- a/mlir/test/Dialect/Tensor/bufferize.mlir +++ b/mlir/test/Dialect/Tensor/bufferize.mlir @@ -728,7 +728,7 @@ func.func @tensor.concat_dynamic_nonconcat_dim(%f: tensor<?x?xf32>, %g: tensor<? // CHECK-DAG: %[[ALLOC:.*]] = memref.alloc(%[[M]], %[[N]]) {{.*}} : memref<?x3x?xf32> // CHECK: %[[ALLOC_T:.*]] = bufferization.to_tensor %[[ALLOC]] // CHECK: %[[MAPPED:.*]] = linalg.map outs(%[[ALLOC_T]] : tensor<?x3x?xf32>) -// CHECK: () { +// CHECK: (%[[INIT:.*]]: f32) { // CHECK: linalg.yield %[[F]] : f32 // CHECK: } // CHECK: return %[[MAPPED]] : tensor<?x3x?xf32> diff --git a/mlir/test/Interfaces/TilingInterface/lower-to-loops-using-interface.mlir b/mlir/test/Interfaces/TilingInterface/lower-to-loops-using-interface.mlir index 8cbee3cbb758..aa8882d21698 100644 --- a/mlir/test/Interfaces/TilingInterface/lower-to-loops-using-interface.mlir +++ b/mlir/test/Interfaces/TilingInterface/lower-to-loops-using-interface.mlir @@ -257,10 +257,10 @@ module attributes {transform.with_named_sequence} { // ----- func.func @map(%lhs: memref<64xf32>, - %rhs: memref<64xf32>, %out: memref<64xf32>) { + %rhs: memref<64xf32>, %init: memref<64xf32>) { linalg.map ins(%lhs, %rhs : memref<64xf32>, memref<64xf32>) - outs(%out : memref<64xf32>) - (%in: f32, %in_0: f32) { + outs(%init : memref<64xf32>) + (%in: f32, %in_0: f32, %out: f32) { %0 = arith.addf %in, %in_0 : f32 linalg.yield %0 : f32 } diff --git a/mlir/test/Target/LLVMIR/omptarget-record-type-with-ptr-member-host.mlir b/mlir/test/Target/LLVMIR/omptarget-record-type-with-ptr-member-host.mlir index a1e415c35e4b..9640f03311af 100644 --- a/mlir/test/Target/LLVMIR/omptarget-record-type-with-ptr-member-host.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-record-type-with-ptr-member-host.mlir @@ -81,9 +81,8 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a // CHECK: %[[ARR_SECT_SIZE:.*]] = mul i64 %[[ARR_SECT_SIZE1]], 4 // CHECK: %[[LFULL_ARR:.*]] = load ptr, ptr @full_arr, align 8 // CHECK: %[[FULL_ARR_PTR:.*]] = getelementptr inbounds float, ptr %[[LFULL_ARR]], i64 0 -// CHECK: %[[ARR_SECT_OFFSET1:.*]] = mul i64 %[[ARR_SECT_OFFSET2]], 1 // CHECK: %[[LARR_SECT:.*]] = load ptr, ptr @sect_arr, align 8 -// CHECK: %[[ARR_SECT_PTR:.*]] = getelementptr inbounds i32, ptr %[[LARR_SECT]], i64 %[[ARR_SECT_OFFSET1]] +// CHECK: %[[ARR_SECT_PTR:.*]] = getelementptr inbounds i32, ptr %[[LARR_SECT]], i64 %[[ARR_SECT_OFFSET2]] // CHECK: %[[SCALAR_PTR_LOAD:.*]] = load ptr, ptr %[[SCALAR_BASE]], align 8 // CHECK: %[[FULL_ARR_DESC_SIZE:.*]] = sdiv exact i64 48, ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) // CHECK: %[[FULL_ARR_SIZE_CMP:.*]] = icmp eq ptr %[[FULL_ARR_PTR]], null diff --git a/mlir/test/lib/Dialect/Transform/TestTransformDialectExtension.cpp b/mlir/test/lib/Dialect/Transform/TestTransformDialectExtension.cpp index 496f18bc49fa..61db9d2b4446 100644 --- a/mlir/test/lib/Dialect/Transform/TestTransformDialectExtension.cpp +++ b/mlir/test/lib/Dialect/Transform/TestTransformDialectExtension.cpp @@ -797,7 +797,7 @@ DiagnosedSilenceableFailure mlir::test::TestProduceInvalidIR::applyToOne( // Provide some IR that does not verify. rewriter.setInsertionPointToStart(&target->getRegion(0).front()); TestDummyPayloadOp::create(rewriter, target->getLoc(), TypeRange(), - ValueRange(), /*failToVerify=*/true); + ValueRange(), /*fail_to_verify=*/true); return DiagnosedSilenceableFailure::success(); } diff --git a/mlir/unittests/Dialect/OpenACC/OpenACCOpsTest.cpp b/mlir/unittests/Dialect/OpenACC/OpenACCOpsTest.cpp index 6ac9a873e615..d6203b97e00d 100644 --- a/mlir/unittests/Dialect/OpenACC/OpenACCOpsTest.cpp +++ b/mlir/unittests/Dialect/OpenACC/OpenACCOpsTest.cpp @@ -766,7 +766,9 @@ void testShortDataEntryOpBuildersMappableVar(OpBuilder &b, MLIRContext &context, struct IntegerOpenACCMappableModel : public mlir::acc::MappableType::ExternalModel<IntegerOpenACCMappableModel, - IntegerType> {}; + IntegerType> { + bool hasUnknownDimensions(mlir::Type type) const { return false; } +}; TEST_F(OpenACCOpsTest, mappableTypeBuilderDataEntry) { // First, set up the test by attaching MappableInterface to IntegerType. diff --git a/offload/test/offloading/fortran/descriptor-array-slice-map.f90 b/offload/test/offloading/fortran/descriptor-array-slice-map.f90 new file mode 100644 index 000000000000..69abb320adc3 --- /dev/null +++ b/offload/test/offloading/fortran/descriptor-array-slice-map.f90 @@ -0,0 +1,61 @@ +! Offloading test which aims to test that an allocatable/descriptor type map +! will allow the appropriate slicing behaviour. +! REQUIRES: flang, amdgpu + +subroutine slice_writer(n, a, b, c) + implicit none + integer, intent(in) :: n + real(8), intent(in) :: a(n) + real(8), intent(in) :: b(n) + real(8), intent(out) :: c(n) + integer :: i + + !$omp target teams distribute parallel do + do i=1,n + c(i) = b(i) + a(i) + end do +end subroutine slice_writer + +! RUN: %libomptarget-compile-fortran-run-and-check-generic +program main + implicit none + real(kind=8), allocatable :: a(:,:,:) + integer :: i, j, k, idx, idx1, idx2, idx3 + + i=50 + j=100 + k=2 + + allocate(a(1:i,1:j,1:k)) + + do idx1=1, i + do idx2=1, j + do idx3=1, k + a(idx1,idx2,idx3) = idx2 + end do + end do + end do + + do idx=1,k + !$omp target enter data map(alloc: a(1:i,:, idx)) + + !$omp target update to(a(1:i, 1:30, idx), & + !$omp& a(1:i, 61:100, idx)) + + call slice_writer(i, a(:, 1, idx), a(:, 61, idx), a(:, 31, idx)) + call slice_writer(i, a(:, 30, idx), a(:, 100, idx), a(:, 60, idx)) + + !$omp target update from(a(1:i, 31:60, idx)) + !$omp target exit data map(delete: a(1:i, :, idx)) + + print *, a(1, 31, idx), a(2, 31, idx), a(i, 31, idx) + print *, a(1, 60, idx), a(2, 60, idx), a(i, 60, idx) + enddo + + deallocate(a) +end program + +! CHECK: 62. 62. 62. +! CHECK: 130. 130. 130. +! CHECK: 62. 62. 62. +! CHECK: 130. 130. 130. |
