summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAiden Grossman <aidengrossman@google.com>2025-10-30 19:28:46 +0000
committerAiden Grossman <aidengrossman@google.com>2025-10-30 19:28:46 +0000
commit3d44d2a83abba5810f00a3f17acd133f5a4fac56 (patch)
tree9e61f193869172bf0060f695f567abdac24b3117
parent8d3435a08bf58fb12d62be8d1551192b491e6195 (diff)
parent546e91bacf2686613908701397ecad0b47165384 (diff)
[𝘀𝗽𝗿] changes introduced through rebaseusers/boomanaiden154/main.nsan-make-tests-work-with-internal-shell
Created using spr 1.3.7 [skip ci]
-rw-r--r--clang/docs/Modules.rst17
-rw-r--r--clang/include/clang/Basic/Builtins.def1
-rw-r--r--clang/include/clang/Basic/BuiltinsAMDGPU.def41
-rw-r--r--clang/lib/AST/ASTContext.cpp5
-rw-r--r--clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp81
-rw-r--r--clang/lib/Lex/HeaderSearch.cpp4
-rw-r--r--clang/lib/Sema/SemaAMDGPU.cpp43
-rw-r--r--clang/test/CodeGen/builtins-extended-image.c1528
-rw-r--r--clang/test/SemaOpenCL/builtins-extended-image-param-gfx1100-err.cl227
-rw-r--r--clang/test/SemaOpenCL/builtins-extended-image-param-gfx942-err.cl227
-rw-r--r--compiler-rt/test/profile/Linux/instrprof-debug-info-correlate-warnings.c2
-rw-r--r--flang/include/flang/Semantics/dump-expr.h3
-rw-r--r--flang/lib/Parser/prescan.cpp2
-rw-r--r--flang/test/Parser/inline-directives.f9029
-rw-r--r--libc/CMakeLists.txt2
-rw-r--r--libc/config/linux/x86_64/exclude.txt8
-rw-r--r--libc/include/locale.yaml2
-rw-r--r--libc/include/stdio.yaml2
-rw-r--r--libc/include/stdlib.yaml2
-rw-r--r--libc/include/string.yaml2
-rw-r--r--libc/include/time.yaml2
-rw-r--r--libc/include/wchar.yaml8
-rw-r--r--libc/src/time/strftime.cpp2
-rw-r--r--libc/src/time/strftime_l.cpp2
-rw-r--r--libc/test/src/time/strftime_test.cpp20
-rw-r--r--libc/utils/hdrgen/hdrgen/enumeration.py16
-rw-r--r--libc/utils/hdrgen/hdrgen/function.py16
-rw-r--r--libc/utils/hdrgen/hdrgen/header.py81
-rw-r--r--libc/utils/hdrgen/hdrgen/macro.py16
-rwxr-xr-xlibc/utils/hdrgen/hdrgen/main.py1
-rw-r--r--libc/utils/hdrgen/hdrgen/object.py16
-rw-r--r--libc/utils/hdrgen/hdrgen/symbol.py41
-rw-r--r--libc/utils/hdrgen/hdrgen/type.py20
-rw-r--r--libc/utils/hdrgen/hdrgen/yaml_to_classes.py2
-rw-r--r--libc/utils/hdrgen/tests/expected_output/custom.h21
-rw-r--r--libc/utils/hdrgen/tests/expected_output/sorting.h24
-rw-r--r--libc/utils/hdrgen/tests/expected_output/test_header.h1
-rw-r--r--libc/utils/hdrgen/tests/expected_output/test_small.json1
-rw-r--r--libc/utils/hdrgen/tests/input/custom-common.yaml6
-rw-r--r--libc/utils/hdrgen/tests/input/custom.yaml13
-rw-r--r--libc/utils/hdrgen/tests/input/sorting.yaml20
-rw-r--r--libc/utils/hdrgen/tests/test_integration.py14
-rw-r--r--libcxx/test/libcxx/input.output/iostreams.base/ios.base/ios.base.cons/dtor.uninitialized.pass.cpp6
-rw-r--r--libcxx/test/std/input.output/file.streams/fstreams/filebuf.virtuals/setbuf.pass.cpp6
-rw-r--r--libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/sync.pass.cpp6
-rw-r--r--libcxx/test/std/localization/locale.categories/category.collate/locale.collate.byname/compare.pass.cpp16
-rw-r--r--libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_fr_FR.pass.cpp5
-rw-r--r--libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_ru_RU.pass.cpp3
-rw-r--r--libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_zh_CN.pass.cpp27
-rw-r--r--libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_fr_FR.pass.cpp5
-rw-r--r--libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_ru_RU.pass.cpp3
-rw-r--r--libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_zh_CN.pass.cpp43
-rw-r--r--libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/curr_symbol.pass.cpp15
-rw-r--r--libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/grouping.pass.cpp5
-rw-r--r--libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/neg_format.pass.cpp35
-rw-r--r--libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/pos_format.pass.cpp10
-rw-r--r--libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_double.pass.cpp6
-rw-r--r--libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_float.pass.cpp6
-rw-r--r--libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long_double.pass.cpp6
-rw-r--r--libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/grouping.pass.cpp7
-rw-r--r--libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/thousands_sep.pass.cpp5
-rw-r--r--libcxx/test/std/time/time.duration/time.duration.nonmember/ostream.pass.cpp10
-rw-r--r--libcxx/test/std/time/time.syn/formatter.duration.pass.cpp51
-rw-r--r--libcxx/test/std/time/time.syn/formatter.file_time.pass.cpp19
-rw-r--r--libcxx/test/std/time/time.syn/formatter.hh_mm_ss.pass.cpp35
-rw-r--r--libcxx/test/std/time/time.syn/formatter.local_time.pass.cpp19
-rw-r--r--libcxx/test/std/time/time.syn/formatter.sys_time.pass.cpp19
-rw-r--r--libcxx/test/support/locale_helpers.h12
-rw-r--r--libcxxabi/test/uncaught_exception.pass.cpp6
-rw-r--r--lld/ELF/SyntheticSections.cpp7
-rw-r--r--lld/ELF/SyntheticSections.h4
-rw-r--r--lld/ELF/Writer.cpp7
-rw-r--r--lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py206
-rw-r--r--lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py4
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp4
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h1
-rw-r--r--lldb/test/API/commands/register/register/aarch64_dynamic_regset/TestArm64DynamicRegsets.py7
-rw-r--r--lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx-simulators/optional/TestDataFormatterLibcxxOptionalSimulator.py2
-rw-r--r--lldb/test/API/lang/cpp/libcxx-internals-recognizer/TestLibcxxInternalsRecognizer.py2
-rw-r--r--lldb/test/API/tools/lldb-dap/breakpoint-events/TestDAP_breakpointEvents.py30
-rw-r--r--lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py2
-rw-r--r--lldb/test/API/tools/lldb-dap/module-event/TestDAP_module_event.py88
-rw-r--r--lldb/test/API/tools/lldb-dap/module/TestDAP_module.py8
-rw-r--r--lldb/test/API/tools/lldb-dap/restart/TestDAP_restart_console.py24
-rw-r--r--lldb/test/API/tools/lldb-dap/send-event/TestDAP_sendEvent.py2
-rw-r--r--lldb/unittests/SymbolFile/DWARF/DWARFASTParserClangTests.cpp90
-rw-r--r--llvm/docs/GettingInvolved.rst8
-rw-r--r--llvm/include/llvm/ADT/TypeSwitch.h17
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h2
-rw-r--r--llvm/lib/Analysis/DependenceAnalysis.cpp22
-rw-r--r--llvm/lib/Frontend/Driver/CodeGenOptions.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp20
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp15
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h1
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp14
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h3
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp10
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp5
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.h2
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp1
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp14
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoP.td5
-rw-r--r--llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp1
-rw-r--r--llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp20
-rw-r--r--llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp2
-rw-r--r--llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp60
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp6
-rw-r--r--llvm/test/Analysis/DependenceAnalysis/GCD.ll6
-rw-r--r--llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll4
-rw-r--r--llvm/test/Analysis/DependenceAnalysis/compute-absolute-value.ll2
-rw-r--r--llvm/test/Analysis/DependenceAnalysis/gcd-miv-overflow.ll15
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/add.ll612
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll66
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.mir37
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.mir524
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s16.mir19
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.v2s16.mir24
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir8
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sub.mir479
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir8
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/sub.ll535
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll21
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll12
-rw-r--r--llvm/test/CodeGen/ARM/strict-fp-func.ll13
-rw-r--r--llvm/test/CodeGen/RISCV/rv64-stackmap.ll4
-rw-r--r--llvm/test/CodeGen/RISCV/rv64p.ll6
-rw-r--r--llvm/test/MC/AMDGPU/gfx12_asm_vop1.s4
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-profile.ll89
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/pr60736.ll11
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/simple-unswitch-profile.ll157
-rw-r--r--llvm/unittests/ADT/TypeSwitchTest.cpp41
-rw-r--r--llvm/utils/lit/tests/Inputs/shtest-readfile/env.txt2
-rw-r--r--llvm/utils/lit/tests/Inputs/shtest-readfile/lit.cfg1
-rw-r--r--llvm/utils/lit/tests/Inputs/shtest-ulimit-nondarwin/ulimit_unlimited.txt (renamed from llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit_unlimited.txt)4
-rw-r--r--llvm/utils/lit/tests/shtest-readfile.py2
-rw-r--r--llvm/utils/lit/tests/shtest-ulimit-nondarwin.py8
-rw-r--r--llvm/utils/lit/tests/shtest-ulimit.py8
-rw-r--r--mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td4
-rw-r--r--mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp37
-rw-r--r--mlir/lib/Dialect/Linalg/Transforms/Generalization.cpp6
-rw-r--r--mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp2
-rw-r--r--mlir/test/Dialect/Linalg/canonicalize.mlir2
-rw-r--r--mlir/test/Dialect/Linalg/generalize-named-ops.mlir22
-rw-r--r--mlir/test/Dialect/Linalg/invalid.mlir10
-rw-r--r--mlir/test/Dialect/Linalg/one-shot-bufferize.mlir2
-rw-r--r--mlir/test/Dialect/Linalg/roundtrip.mlir18
-rw-r--r--mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir2
-rw-r--r--mlir/test/Dialect/Tensor/bufferize.mlir2
-rw-r--r--mlir/test/Interfaces/TilingInterface/lower-to-loops-using-interface.mlir6
-rw-r--r--mlir/test/lib/Dialect/Transform/TestTransformDialectExtension.cpp2
150 files changed, 5696 insertions, 818 deletions
diff --git a/clang/docs/Modules.rst b/clang/docs/Modules.rst
index acbe45e0be97..e45ee9ff9eac 100644
--- a/clang/docs/Modules.rst
+++ b/clang/docs/Modules.rst
@@ -421,13 +421,7 @@ As an example, the module map file for the C standard library might look a bit l
.. parsed-literal::
- module std [system] [extern_c] {
- module assert {
- textual header "assert.h"
- header "bits/assert-decls.h"
- export *
- }
-
+ module std [system] {
module complex {
header "complex.h"
export *
@@ -440,7 +434,6 @@ As an example, the module map file for the C standard library might look a bit l
module errno {
header "errno.h"
- header "sys/errno.h"
export *
}
@@ -673,14 +666,14 @@ of checking *use-declaration*\s, and must still be a lexically-valid header
file. In the future, we intend to pre-tokenize such headers and include the
token sequence within the prebuilt module representation.
-A header with the ``exclude`` specifier is excluded from the module. It will not be included when the module is built, nor will it be considered to be part of the module, even if an ``umbrella`` header or directory would otherwise make it part of the module.
+A header with the ``exclude`` specifier is excluded from the module. It will not be included when the module is built, nor will it be considered to be part of the module, even if an ``umbrella`` directory would otherwise make it part of the module.
-**Example:** The C header ``assert.h`` is an excellent candidate for a textual header, because it is meant to be included multiple times (possibly with different ``NDEBUG`` settings). However, declarations within it should typically be split into a separate modular header.
+**Example:** A "X macro" header is an excellent candidate for a textual header, because it is can't be compiled standalone, and by itself does not contain any declarations.
.. parsed-literal::
- module std [system] {
- textual header "assert.h"
+ module MyLib [system] {
+ textual header "xmacros.h"
}
A given header shall not be referenced by more than one *header-declaration*.
diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def
index b856ad145824..3a5b72e20afa 100644
--- a/clang/include/clang/Basic/Builtins.def
+++ b/clang/include/clang/Basic/Builtins.def
@@ -43,6 +43,7 @@
// SJ -> sigjmp_buf
// K -> ucontext_t
// p -> pid_t
+// e -> _Float16 for HIP/C++ and __fp16 for OpenCL
// . -> "...". This may only occur at the end of the function list.
//
// Types may be prefixed with the following modifiers:
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index f265d82efee7..36cb527a9c80 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -967,6 +967,47 @@ TARGET_BUILTIN(__builtin_amdgcn_image_sample_3d_v4f32_f32, "V4fifffQtV4ibii", "n
TARGET_BUILTIN(__builtin_amdgcn_image_sample_3d_v4f16_f32, "V4hifffQtV4ibii", "nc", "image-insts")
TARGET_BUILTIN(__builtin_amdgcn_image_sample_cube_v4f32_f32, "V4fifffQtV4ibii", "nc", "image-insts")
TARGET_BUILTIN(__builtin_amdgcn_image_sample_cube_v4f16_f32, "V4hifffQtV4ibii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_1d_v4f32_f32, "V4fifQtV4ibii", "nc", "extended-image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_1d_v4f16_f32, "V4eifQtV4ibii", "nc", "extended-image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_1darray_v4f32_f32, "V4fiffQtV4ibii", "nc", "extended-image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_1darray_v4f16_f32, "V4eiffQtV4ibii", "nc", "extended-image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_2d_f32_f32, "fiffQtV4ibii", "nc", "extended-image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_2d_v4f32_f32, "V4fiffQtV4ibii", "nc", "extended-image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_2d_v4f16_f32, "V4eiffQtV4ibii", "nc", "extended-image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_2darray_f32_f32, "fifffQtV4ibii", "nc", "extended-image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_2darray_v4f32_f32, "V4fifffQtV4ibii", "nc", "extended-image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_2darray_v4f16_f32, "V4eifffQtV4ibii", "nc", "extended-image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_3d_v4f32_f32, "V4fifffQtV4ibii", "nc", "extended-image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_3d_v4f16_f32, "V4eifffQtV4ibii", "nc", "extended-image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_cube_v4f32_f32, "V4fifffQtV4ibii", "nc", "extended-image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_cube_v4f16_f32, "V4eifffQtV4ibii", "nc", "extended-image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_1d_v4f32_f32, "V4fiffQtV4ibii", "nc", "extended-image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_1d_v4f16_f32, "V4eiffQtV4ibii", "nc", "extended-image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_1darray_v4f32_f32, "V4fifffQtV4ibii", "nc", "extended-image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_1darray_v4f16_f32, "V4eifffQtV4ibii", "nc", "extended-image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_2d_f32_f32, "fifffQtV4ibii", "nc", "extended-image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_2d_v4f32_f32, "V4fifffQtV4ibii", "nc", "extended-image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_2d_v4f16_f32, "V4eifffQtV4ibii", "nc", "extended-image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_2darray_f32_f32, "fiffffQtV4ibii", "nc", "extended-image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_2darray_v4f32_f32, "V4fiffffQtV4ibii", "nc", "extended-image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_2darray_v4f16_f32, "V4eiffffQtV4ibii", "nc", "extended-image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_3d_v4f32_f32, "V4fiffffQtV4ibii", "nc", "extended-image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_3d_v4f16_f32, "V4eiffffQtV4ibii", "nc", "extended-image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_cube_v4f32_f32, "V4fiffffQtV4ibii", "nc", "extended-image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_cube_v4f16_f32, "V4eiffffQtV4ibii", "nc", "extended-image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_1d_v4f32_f32, "V4fifffQtV4ibii", "nc", "extended-image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_1d_v4f16_f32, "V4eifffQtV4ibii", "nc", "extended-image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_1darray_v4f32_f32, "V4fiffffQtV4ibii", "nc", "extended-image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_1darray_v4f16_f32, "V4eiffffQtV4ibii", "nc", "extended-image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_2d_f32_f32, "fiffffffQtV4ibii", "nc", "extended-image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_2d_v4f32_f32, "V4fiffffffQtV4ibii", "nc", "extended-image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_2d_v4f16_f32, "V4eiffffffQtV4ibii", "nc", "extended-image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_2darray_f32_f32, "fifffffffQtV4ibii", "nc", "extended-image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_2darray_v4f32_f32, "V4fifffffffQtV4ibii", "nc", "extended-image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_2darray_v4f16_f32, "V4eifffffffQtV4ibii", "nc", "extended-image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_3d_v4f32_f32, "V4fifffffffffQtV4ibii", "nc", "extended-image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_3d_v4f16_f32, "V4eifffffffffQtV4ibii", "nc", "extended-image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32, "V4fiffQtV4ibii", "nc", "extended-image-insts")
#undef BUILTIN
#undef TARGET_BUILTIN
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 687cd46773f4..2669f6245671 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -12403,6 +12403,11 @@ static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context,
// Read the base type.
switch (*Str++) {
default: llvm_unreachable("Unknown builtin type letter!");
+ case 'e':
+ assert(HowLong == 0 && !Signed && !Unsigned &&
+ "Bad modifiers used with 'e'!");
+ Type = Context.getLangOpts().OpenCL ? Context.HalfTy : Context.Float16Ty;
+ break;
case 'x':
assert(HowLong == 0 && !Signed && !Unsigned &&
"Bad modifiers used with 'x'!");
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index f49a5af2c958..9eab70955b6b 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -647,8 +647,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_ballot_w64: {
llvm::Type *ResultType = ConvertType(E->getType());
llvm::Value *Src = EmitScalarExpr(E->getArg(0));
- Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, { ResultType });
- return Builder.CreateCall(F, { Src });
+ Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType});
+ return Builder.CreateCall(F, {Src});
}
case AMDGPU::BI__builtin_amdgcn_inverse_ballot_w32:
case AMDGPU::BI__builtin_amdgcn_inverse_ballot_w64: {
@@ -1139,6 +1139,83 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32:
return emitAMDGCNImageOverloadedReturnType(
*this, E, Intrinsic::amdgcn_image_sample_cube, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f16_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_lz_1d, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f16_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_l_1d, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f16_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_d_1d, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f16_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_f32_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_lz_2d, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f16_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_f32_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_l_2d, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f16_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_f32_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_d_2d, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f16_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_lz_3d, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f16_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_l_3d, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f16_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_d_3d, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f16_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_lz_cube, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f16_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_l_cube, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f16_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_lz_1darray, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f16_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_l_1darray, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f16_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_d_1darray, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f16_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_f32_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_lz_2darray, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f16_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_f32_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_l_2darray, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f16_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_f32_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_d_2darray, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_gather4_lz_2d, false);
case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
llvm::FixedVectorType *VT = FixedVectorType::get(Builder.getInt32Ty(), 8);
diff --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp
index 65c324c10ca5..f05c28fd7a12 100644
--- a/clang/lib/Lex/HeaderSearch.cpp
+++ b/clang/lib/Lex/HeaderSearch.cpp
@@ -221,7 +221,7 @@ std::string HeaderSearch::getPrebuiltModuleFileName(StringRef ModuleName,
// file.
for (const std::string &Dir : HSOpts.PrebuiltModulePaths) {
SmallString<256> Result(Dir);
- llvm::sys::fs::make_absolute(Result);
+ FileMgr.makeAbsolutePath(Result);
if (ModuleName.contains(':'))
// The separator of C++20 modules partitions (':') is not good for file
// systems, here clang and gcc choose '-' by default since it is not a
@@ -246,7 +246,7 @@ std::string HeaderSearch::getPrebuiltImplicitModuleFileName(Module *Module) {
StringRef ModuleCacheHash = HSOpts.DisableModuleHash ? "" : getModuleHash();
for (const std::string &Dir : HSOpts.PrebuiltModulePaths) {
SmallString<256> CachePath(Dir);
- llvm::sys::fs::make_absolute(CachePath);
+ FileMgr.makeAbsolutePath(CachePath);
llvm::sys::path::append(CachePath, ModuleCacheHash);
std::string FileName =
getCachedModuleFileNameImpl(ModuleName, ModuleMapPath, CachePath);
diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp
index e32f4376a5eb..139c4abc040d 100644
--- a/clang/lib/Sema/SemaAMDGPU.cpp
+++ b/clang/lib/Sema/SemaAMDGPU.cpp
@@ -153,7 +153,48 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f32_f32:
case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f16_f32:
case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f32_f32:
- case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32: {
+ case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f16_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f16_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f16_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f16_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f16_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f16_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f16_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f16_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f16_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f16_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f16_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f16_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f16_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f16_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f16_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f16_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f16_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32: {
StringRef FeatureList(
getASTContext().BuiltinInfo.getRequiredFeatures(BuiltinID));
if (!Builtin::evaluateRequiredTargetFeatures(FeatureList,
diff --git a/clang/test/CodeGen/builtins-extended-image.c b/clang/test/CodeGen/builtins-extended-image.c
new file mode 100644
index 000000000000..0dbf81dabd77
--- /dev/null
+++ b/clang/test/CodeGen/builtins-extended-image.c
@@ -0,0 +1,1528 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1100 -target-feature +extended-image-insts %s -emit-llvm -o - | FileCheck %s
+
+typedef int int4 __attribute__((ext_vector_type(4)));
+typedef float float4 __attribute__((ext_vector_type(4)));
+typedef _Float16 half4 __attribute__((ext_vector_type(4)));
+
+// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_gather4_lz_2d_v4f32_f32_r(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP4]]
+//
+float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_r(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(1, f32, f32, tex, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_gather4_lz_2d_v4f32_f32_g(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32.v8i32.v4i32(i32 2, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP4]]
+//
+float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_g(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(2, f32, f32, tex, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_gather4_lz_2d_v4f32_f32_b(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32.v8i32.v4i32(i32 4, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP4]]
+//
+float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_b(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(4, f32, f32, tex, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_gather4_lz_2d_v4f32_f32_a(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32.v8i32.v4i32(i32 8, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP4]]
+//
+float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_a(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(8, f32, f32, tex, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_lz_1d_v4f32_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32
+// CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP2]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP3]]
+//
+float4 test_amdgcn_image_sample_lz_1d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_lz_1d_v4f32_f32(100, f32, tex, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_l_1d_v4f32_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP4]]
+//
+float4 test_amdgcn_image_sample_l_1d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_l_1d_v4f32_f32(100, f32, f32, tex, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_d_1d_v4f32_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP5]]
+//
+float4 test_amdgcn_image_sample_d_1d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_d_1d_v4f32_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_lz_2d_v4f32_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP4]]
+//
+float4 test_amdgcn_image_sample_lz_2d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_lz_2d_v4f32_f32(100, f32, f32, tex, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_l_2d_v4f32_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32.v8i32.v4i32(i32 10, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP5]]
+//
+float4 test_amdgcn_image_sample_l_2d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_l_2d_v4f32_f32(10, f32, f32, f32, tex, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_d_2d_v4f32_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP6]], align 32
+// CHECK-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP8:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP7]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP8]]
+//
+float4 test_amdgcn_image_sample_d_2d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_d_2d_v4f32_f32(100, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110);
+}
+// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_lz_3d_v4f32_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.3d.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP5]]
+//
+float4 test_amdgcn_image_sample_lz_3d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_lz_3d_v4f32_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_l_3d_v4f32_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
+// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP6:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.3d.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP6]]
+//
+float4 test_amdgcn_image_sample_l_3d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_l_3d_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_d_3d_v4f32_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP7:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP9]], align 32
+// CHECK-NEXT: [[TMP10:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP11:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], float [[TMP6]], float [[TMP7]], float [[TMP8]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP10]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP11]]
+//
+float4 test_amdgcn_image_sample_d_3d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_d_3d_v4f32_f32(1, f32, f32, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_lz_cube_v4f32_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.cube.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP5]]
+//
+float4 test_amdgcn_image_sample_lz_cube_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_lz_cube_v4f32_f32(1, f32, f32, f32, tex, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_l_cube_v4f32_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
+// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP6:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.cube.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP6]]
+//
+float4 test_amdgcn_image_sample_l_cube_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_l_cube_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_lz_1darray_v4f32_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.1darray.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP4]]
+//
+float4 test_amdgcn_image_sample_lz_1darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_lz_1darray_v4f32_f32(1, f32, f32, tex, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_l_1darray_v4f32_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.1darray.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP5]]
+//
+float4 test_amdgcn_image_sample_l_1darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_l_1darray_v4f32_f32(1, f32, f32, f32, tex, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_d_1darray_v4f32_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
+// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP6:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.1darray.v4f32.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP6]]
+//
+float4 test_amdgcn_image_sample_d_1darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_d_1darray_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_lz_2darray_v4f32_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.2darray.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP5]]
+//
+float4 test_amdgcn_image_sample_lz_2darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_lz_2darray_v4f32_f32(1, f32, f32, f32, tex, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_l_2darray_v4f32_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
+// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP6:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.2darray.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP6]]
+//
+float4 test_amdgcn_image_sample_l_2darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_l_2darray_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_d_2darray_v4f32_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP7]], align 32
+// CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP9:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.2darray.v4f32.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], float [[TMP6]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP8]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP9]]
+//
+float4 test_amdgcn_image_sample_d_2darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_d_2darray_v4f32_f32(1, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_lz_1d_v4f16_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32
+// CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.1d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP2]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP3]]
+//
+half4 test_amdgcn_image_sample_lz_1d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_lz_1d_v4f16_f32(100, f32, tex, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_l_1d_v4f16_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.l.1d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP4]]
+//
+half4 test_amdgcn_image_sample_l_1d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_l_1d_v4f16_f32(100, f32, f32, tex, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_d_1d_v4f16_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.d.1d.v4f16.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP5]]
+//
+half4 test_amdgcn_image_sample_d_1d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_d_1d_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_lz_2d_v4f16_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.2d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP4]]
+//
+half4 test_amdgcn_image_sample_lz_2d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_lz_2d_v4f16_f32(100, f32, f32, tex, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_l_2d_v4f16_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.l.2d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP5]]
+//
+half4 test_amdgcn_image_sample_l_2d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_l_2d_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_d_2d_v4f16_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP6]], align 32
+// CHECK-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP8:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.d.2d.v4f16.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP7]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP8]]
+//
+half4 test_amdgcn_image_sample_d_2d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_d_2d_v4f16_f32(100, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_lz_3d_v4f16_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.3d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP5]]
+//
+half4 test_amdgcn_image_sample_lz_3d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_lz_3d_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_l_3d_v4f16_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
+// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP6:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.l.3d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP6]]
+//
+half4 test_amdgcn_image_sample_l_3d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_l_3d_v4f16_f32(100, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_d_3d_v4f16_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP7:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP9]], align 32
+// CHECK-NEXT: [[TMP10:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP11:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.d.3d.v4f16.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], float [[TMP6]], float [[TMP7]], float [[TMP8]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP10]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP11]]
+//
+half4 test_amdgcn_image_sample_d_3d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_d_3d_v4f16_f32(100, f32, f32, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_lz_cube_v4f16_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.cube.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP5]]
+//
+half4 test_amdgcn_image_sample_lz_cube_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_lz_cube_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_l_cube_v4f16_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
+// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP6:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.l.cube.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP6]]
+//
+half4 test_amdgcn_image_sample_l_cube_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_l_cube_v4f16_f32(100, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_lz_1darray_v4f16_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.1darray.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP4]]
+//
+half4 test_amdgcn_image_sample_lz_1darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_lz_1darray_v4f16_f32(100, f32, f32, tex, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_l_1darray_v4f16_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.l.1darray.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP5]]
+//
+half4 test_amdgcn_image_sample_l_1darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_l_1darray_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_d_1darray_v4f16_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
+// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP6:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.d.1darray.v4f16.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP6]]
+//
+half4 test_amdgcn_image_sample_d_1darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_d_1darray_v4f16_f32(100, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_lz_2darray_v4f16_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.2darray.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP5]]
+//
+half4 test_amdgcn_image_sample_lz_2darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_lz_2darray_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_l_2darray_v4f16_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
+// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP6:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.l.2darray.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP6]]
+//
+half4 test_amdgcn_image_sample_l_2darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_l_2darray_v4f16_f32(100, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_d_2darray_v4f16_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP7]], align 32
+// CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP9:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.d.2darray.v4f16.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], float [[TMP6]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP8]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP9]]
+//
+half4 test_amdgcn_image_sample_d_2darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_d_2darray_v4f16_f32(100, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local float @test_amdgcn_image_sample_lz_2d_f32_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.image.sample.lz.2d.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret float [[TMP4]]
+//
+float test_amdgcn_image_sample_lz_2d_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_lz_2d_f32_f32(1, f32, f32, tex, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local float @test_amdgcn_image_sample_l_2d_f32_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.image.sample.l.2d.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret float [[TMP5]]
+//
+float test_amdgcn_image_sample_l_2d_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_l_2d_f32_f32(1, f32, f32, f32, tex, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local float @test_amdgcn_image_sample_d_2d_f32_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP6]], align 32
+// CHECK-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP8:%.*]] = call float @llvm.amdgcn.image.sample.d.2d.f32.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP7]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret float [[TMP8]]
+//
+float test_amdgcn_image_sample_d_2d_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_d_2d_f32_f32(1, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local float @test_amdgcn_image_sample_lz_2darray_f32_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.image.sample.lz.2darray.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret float [[TMP5]]
+//
+float test_amdgcn_image_sample_lz_2darray_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_lz_2darray_f32_f32(1, f32, f32, f32, tex, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local float @test_amdgcn_image_sample_l_2darray_f32_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
+// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.amdgcn.image.sample.l.2darray.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret float [[TMP6]]
+//
+float test_amdgcn_image_sample_l_2darray_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_l_2darray_f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local float @test_amdgcn_image_sample_d_2darray_f32_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP7]], align 32
+// CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.image.sample.d.2darray.f32.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], float [[TMP6]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP8]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret float [[TMP9]]
+//
+float test_amdgcn_image_sample_d_2darray_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_d_2darray_f32_f32(1, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110);
+}
diff --git a/clang/test/SemaOpenCL/builtins-extended-image-param-gfx1100-err.cl b/clang/test/SemaOpenCL/builtins-extended-image-param-gfx1100-err.cl
new file mode 100644
index 000000000000..47dbdd4e5178
--- /dev/null
+++ b/clang/test/SemaOpenCL/builtins-extended-image-param-gfx1100-err.cl
@@ -0,0 +1,227 @@
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1100 -target-feature +extended-image-insts -S -verify=expected -o - %s
+// REQUIRES: amdgpu-registered-target
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+typedef int int4 __attribute__((ext_vector_type(4)));
+typedef float float4 __attribute__((ext_vector_type(4)));
+typedef half half4 __attribute__((ext_vector_type(4)));
+
+float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_r(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(1, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32' must be a constant integer}}
+}
+
+float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_g(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(2, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32' must be a constant integer}}
+}
+
+float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_b(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(4, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32' must be a constant integer}}
+}
+
+float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_a(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(8, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32' must be a constant integer}}
+}
+
+float4 test_amdgcn_image_sample_lz_1d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_lz_1d_v4f32_f32(i32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_1d_v4f32_f32' must be a constant integer}}
+}
+
+float4 test_amdgcn_image_sample_l_1d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_l_1d_v4f32_f32(100, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_1d_v4f32_f32' must be a constant integer}}
+}
+
+float4 test_amdgcn_image_sample_d_1d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_d_1d_v4f32_f32(100, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_d_1d_v4f32_f32' must be a constant integer}}
+}
+
+float4 test_amdgcn_image_sample_lz_2d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_lz_2d_v4f32_f32(100, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_2d_v4f32_f32' must be a constant integer}}
+}
+
+float4 test_amdgcn_image_sample_l_2d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_l_2d_v4f32_f32(100, f32, f32, f32, tex, vec4i32, 0, f32, 103); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_2d_v4f32_f32' must be a constant integer}}
+}
+
+float4 test_amdgcn_image_sample_d_2d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_d_2d_v4f32_f32(i32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_d_2d_v4f32_f32' must be a constant integer}}
+}
+float4 test_amdgcn_image_sample_lz_3d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_lz_3d_v4f32_f32(i32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_3d_v4f32_f32' must be a constant integer}}
+}
+
+float4 test_amdgcn_image_sample_l_3d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_l_3d_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_3d_v4f32_f32' must be a constant integer}}
+}
+
+float4 test_amdgcn_image_sample_d_3d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_d_3d_v4f32_f32(1, f32, f32, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_d_3d_v4f32_f32' must be a constant integer}}
+}
+
+float4 test_amdgcn_image_sample_lz_cube_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_lz_cube_v4f32_f32(1, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_cube_v4f32_f32' must be a constant integer}}
+}
+
+float4 test_amdgcn_image_sample_l_cube_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_l_cube_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_cube_v4f32_f32' must be a constant integer}}
+}
+
+float4 test_amdgcn_image_sample_lz_1darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_lz_1darray_v4f32_f32(1, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_1darray_v4f32_f32' must be a constant integer}}
+}
+
+float4 test_amdgcn_image_sample_l_1darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_l_1darray_v4f32_f32(1, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_1darray_v4f32_f32' must be a constant integer}}
+}
+
+float4 test_amdgcn_image_sample_d_1darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_d_1darray_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_d_1darray_v4f32_f32' must be a constant integer}}
+}
+
+float4 test_amdgcn_image_sample_lz_2darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_lz_2darray_v4f32_f32(1, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_2darray_v4f32_f32' must be a constant integer}}
+}
+
+float4 test_amdgcn_image_sample_l_2darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_l_2darray_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_2darray_v4f32_f32' must be a constant integer}}
+}
+
+float4 test_amdgcn_image_sample_d_2darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_d_2darray_v4f32_f32(1, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_d_2darray_v4f32_f32' must be a constant integer}}
+}
+
+half4 test_amdgcn_image_sample_lz_1d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_lz_1d_v4f16_f32(23, f32, tex, vec4i32, 0, i32, 11); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_1d_v4f16_f32' must be a constant integer}}
+}
+
+half4 test_amdgcn_image_sample_l_1d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_l_1d_v4f16_f32(i32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_1d_v4f16_f32' must be a constant integer}}
+}
+
+half4 test_amdgcn_image_sample_d_1d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_d_1d_v4f16_f32(i32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_d_1d_v4f16_f32' must be a constant integer}}
+}
+
+half4 test_amdgcn_image_sample_lz_2d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_lz_2d_v4f16_f32(100, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_2d_v4f16_f32' must be a constant integer}}
+}
+
+half4 test_amdgcn_image_sample_l_2d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_l_2d_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_2d_v4f16_f32' must be a constant integer}}
+}
+
+half4 test_amdgcn_image_sample_d_2d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_d_2d_v4f16_f32(100, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_d_2d_v4f16_f32' must be a constant integer}}
+}
+
+half4 test_amdgcn_image_sample_lz_3d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_lz_3d_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_3d_v4f16_f32' must be a constant integer}}
+}
+
+half4 test_amdgcn_image_sample_l_3d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_l_3d_v4f16_f32(100, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_3d_v4f16_f32' must be a constant integer}}
+}
+
+half4 test_amdgcn_image_sample_d_3d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_d_3d_v4f16_f32(100, f32, f32, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_d_3d_v4f16_f32' must be a constant integer}}
+}
+
+half4 test_amdgcn_image_sample_lz_cube_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_lz_cube_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_cube_v4f16_f32' must be a constant integer}}
+}
+
+half4 test_amdgcn_image_sample_l_cube_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_l_cube_v4f16_f32(i32, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_cube_v4f16_f32' must be a constant integer}}
+}
+
+half4 test_amdgcn_image_sample_lz_1darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_lz_1darray_v4f16_f32(i32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_1darray_v4f16_f32' must be a constant integer}}
+}
+
+half4 test_amdgcn_image_sample_l_1darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_l_1darray_v4f16_f32(i32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_1darray_v4f16_f32' must be a constant integer}}
+}
+
+half4 test_amdgcn_image_sample_d_1darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_d_1darray_v4f16_f32(100, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_d_1darray_v4f16_f32' must be a constant integer}}
+}
+
+half4 test_amdgcn_image_sample_lz_2darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_lz_2darray_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_2darray_v4f16_f32' must be a constant integer}}
+}
+
+half4 test_amdgcn_image_sample_l_2darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_l_2darray_v4f16_f32(100, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_2darray_v4f16_f32' must be a constant integer}}
+}
+
+half4 test_amdgcn_image_sample_d_2darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_d_2darray_v4f16_f32(100, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_d_2darray_v4f16_f32' must be a constant integer}}
+}
+
+float test_amdgcn_image_sample_lz_2d_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_lz_2d_f32_f32(1, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_2d_f32_f32' must be a constant integer}}
+}
+
+float test_amdgcn_image_sample_l_2d_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_l_2d_f32_f32(1, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_2d_f32_f32' must be a constant integer}}
+}
+
+float test_amdgcn_image_sample_d_2d_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_d_2d_f32_f32(1, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_d_2d_f32_f32' must be a constant integer}}
+}
+
+float test_amdgcn_image_sample_lz_2darray_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_lz_2darray_f32_f32(1, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_2darray_f32_f32' must be a constant integer}}
+}
+
+float test_amdgcn_image_sample_l_2darray_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_l_2darray_f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_2darray_f32_f32' must be a constant integer}}
+}
+
+float test_amdgcn_image_sample_d_2darray_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_d_2darray_f32_f32(1, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_d_2darray_f32_f32' must be a constant integer}}
+}
diff --git a/clang/test/SemaOpenCL/builtins-extended-image-param-gfx942-err.cl b/clang/test/SemaOpenCL/builtins-extended-image-param-gfx942-err.cl
new file mode 100644
index 000000000000..e60f8c70dc7c
--- /dev/null
+++ b/clang/test/SemaOpenCL/builtins-extended-image-param-gfx942-err.cl
@@ -0,0 +1,227 @@
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx942 -verify=GFX94 -S -o - %s
+// REQUIRES: amdgpu-registered-target
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+typedef int int4 __attribute__((ext_vector_type(4)));
+typedef float float4 __attribute__((ext_vector_type(4)));
+typedef half half4 __attribute__((ext_vector_type(4)));
+
+float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_r(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(1, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_gather4_lz_2d_v4f32_f32_r' needs target feature extended-image-insts}}
+}
+
+float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_g(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(2, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_gather4_lz_2d_v4f32_f32_g' needs target feature extended-image-insts}}
+}
+
+float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_b(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(4, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_gather4_lz_2d_v4f32_f32_b' needs target feature extended-image-insts}}
+}
+
+float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_a(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(8, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_gather4_lz_2d_v4f32_f32_a' needs target feature extended-image-insts}}
+}
+
+float4 test_amdgcn_image_sample_lz_1d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_lz_1d_v4f32_f32(105, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_1d_v4f32_f32' needs target feature extended-image-insts}}
+}
+
+float4 test_amdgcn_image_sample_l_1d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_l_1d_v4f32_f32(100, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_1d_v4f32_f32' needs target feature extended-image-insts}}
+}
+
+float4 test_amdgcn_image_sample_d_1d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_d_1d_v4f32_f32(100, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_d_1d_v4f32_f32' needs target feature extended-image-insts}}
+}
+
+float4 test_amdgcn_image_sample_lz_2d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_lz_2d_v4f32_f32(100, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_2d_v4f32_f32' needs target feature extended-image-insts}}
+}
+
+float4 test_amdgcn_image_sample_l_2d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_l_2d_v4f32_f32(10, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_2d_v4f32_f32' needs target feature extended-image-insts}}
+}
+
+float4 test_amdgcn_image_sample_d_2d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_d_2d_v4f32_f32(105, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_d_2d_v4f32_f32' needs target feature extended-image-insts}}
+}
+float4 test_amdgcn_image_sample_lz_3d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_lz_3d_v4f32_f32(105, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_3d_v4f32_f32' needs target feature extended-image-insts}}
+}
+
+float4 test_amdgcn_image_sample_l_3d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_l_3d_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_3d_v4f32_f32' needs target feature extended-image-insts}}
+}
+
+float4 test_amdgcn_image_sample_d_3d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_d_3d_v4f32_f32(1, f32, f32, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_d_3d_v4f32_f32' needs target feature extended-image-insts}}
+}
+
+float4 test_amdgcn_image_sample_lz_cube_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_lz_cube_v4f32_f32(1, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_cube_v4f32_f32' needs target feature extended-image-insts}}
+}
+
+float4 test_amdgcn_image_sample_l_cube_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_l_cube_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_cube_v4f32_f32' needs target feature extended-image-insts}}
+}
+
+float4 test_amdgcn_image_sample_lz_1darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_lz_1darray_v4f32_f32(1, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_1darray_v4f32_f32' needs target feature extended-image-insts}}
+}
+
+float4 test_amdgcn_image_sample_l_1darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_l_1darray_v4f32_f32(1, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_1darray_v4f32_f32' needs target feature extended-image-insts}}
+}
+
+float4 test_amdgcn_image_sample_d_1darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_d_1darray_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_d_1darray_v4f32_f32' needs target feature extended-image-insts}}
+}
+
+float4 test_amdgcn_image_sample_lz_2darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_lz_2darray_v4f32_f32(1, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_2darray_v4f32_f32' needs target feature extended-image-insts}}
+}
+
+float4 test_amdgcn_image_sample_l_2darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_l_2darray_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_2darray_v4f32_f32' needs target feature extended-image-insts}}
+}
+
+float4 test_amdgcn_image_sample_d_2darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_d_2darray_v4f32_f32(1, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_d_2darray_v4f32_f32' needs target feature extended-image-insts}}
+}
+
+half4 test_amdgcn_image_sample_lz_1d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_lz_1d_v4f16_f32(105, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_1d_v4f16_f32' needs target feature extended-image-insts}}
+}
+
+half4 test_amdgcn_image_sample_l_1d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_l_1d_v4f16_f32(105, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_1d_v4f16_f32' needs target feature extended-image-insts}}
+}
+
+half4 test_amdgcn_image_sample_d_1d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_d_1d_v4f16_f32(105, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_d_1d_v4f16_f32' needs target feature extended-image-insts}}
+}
+
+half4 test_amdgcn_image_sample_lz_2d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_lz_2d_v4f16_f32(100, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_2d_v4f16_f32' needs target feature extended-image-insts}}
+}
+
+half4 test_amdgcn_image_sample_l_2d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_l_2d_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_2d_v4f16_f32' needs target feature extended-image-insts}}
+}
+
+half4 test_amdgcn_image_sample_d_2d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_d_2d_v4f16_f32(100, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_d_2d_v4f16_f32' needs target feature extended-image-insts}}
+}
+
+half4 test_amdgcn_image_sample_lz_3d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_lz_3d_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_3d_v4f16_f32' needs target feature extended-image-insts}}
+}
+
+half4 test_amdgcn_image_sample_l_3d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_l_3d_v4f16_f32(100, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_3d_v4f16_f32' needs target feature extended-image-insts}}
+}
+
+half4 test_amdgcn_image_sample_d_3d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_d_3d_v4f16_f32(100, f32, f32, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_d_3d_v4f16_f32' needs target feature extended-image-insts}}
+}
+
+half4 test_amdgcn_image_sample_lz_cube_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_lz_cube_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_cube_v4f16_f32' needs target feature extended-image-insts}}
+}
+
+half4 test_amdgcn_image_sample_l_cube_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_l_cube_v4f16_f32(105, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_cube_v4f16_f32' needs target feature extended-image-insts}}
+}
+
+half4 test_amdgcn_image_sample_lz_1darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_lz_1darray_v4f16_f32(105, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_1darray_v4f16_f32' needs target feature extended-image-insts}}
+}
+
+half4 test_amdgcn_image_sample_l_1darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_l_1darray_v4f16_f32(105, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_1darray_v4f16_f32' needs target feature extended-image-insts}}
+}
+
+half4 test_amdgcn_image_sample_d_1darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_d_1darray_v4f16_f32(100, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_d_1darray_v4f16_f32' needs target feature extended-image-insts}}
+}
+
+half4 test_amdgcn_image_sample_lz_2darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_lz_2darray_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_2darray_v4f16_f32' needs target feature extended-image-insts}}
+}
+
+half4 test_amdgcn_image_sample_l_2darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_l_2darray_v4f16_f32(100, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_2darray_v4f16_f32' needs target feature extended-image-insts}}
+}
+
+half4 test_amdgcn_image_sample_d_2darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_d_2darray_v4f16_f32(100, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_d_2darray_v4f16_f32' needs target feature extended-image-insts}}
+}
+
+float test_amdgcn_image_sample_lz_2d_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_lz_2d_f32_f32(1, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_2d_f32_f32' needs target feature extended-image-insts}}
+}
+
+float test_amdgcn_image_sample_l_2d_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_l_2d_f32_f32(1, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_2d_f32_f32' needs target feature extended-image-insts}}
+}
+
+float test_amdgcn_image_sample_d_2d_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_d_2d_f32_f32(1, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_d_2d_f32_f32' needs target feature extended-image-insts}}
+}
+
+float test_amdgcn_image_sample_lz_2darray_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_lz_2darray_f32_f32(1, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_2darray_f32_f32' needs target feature extended-image-insts}}
+}
+
+float test_amdgcn_image_sample_l_2darray_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_l_2darray_f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_2darray_f32_f32' needs target feature extended-image-insts}}
+}
+
+float test_amdgcn_image_sample_d_2darray_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_d_2darray_f32_f32(1, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_d_2darray_f32_f32' needs target feature extended-image-insts}}
+}
diff --git a/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate-warnings.c b/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate-warnings.c
index 5069c6340b64..25022f241a6d 100644
--- a/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate-warnings.c
+++ b/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate-warnings.c
@@ -1,6 +1,6 @@
// Disable full debug info and verify that we get warnings during merging
-// RUN: %clang_pgogen -o %t -gline-tables-only -mllvm --debug-info-correlate -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp
+// RUN: %clang_pgogen -o %t -gline-tables-only -mllvm --profile-correlate=debug-info -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp
// RUN: env LLVM_PROFILE_FILE=%t.proflite %run %t
// RUN: llvm-profdata merge -o %t.profdata --debug-info=%t %t.proflite --max-debug-info-correlation-warnings=2 2>&1 >/dev/null | FileCheck %s --check-prefixes=CHECK,LIMIT --implicit-check-not=warning
// RUN: llvm-profdata merge -o %t.profdata --debug-info=%t %t.proflite --max-debug-info-correlation-warnings=0 2>&1 >/dev/null | FileCheck %s --check-prefixes=CHECK,NOLIMIT --implicit-check-not=warning
diff --git a/flang/include/flang/Semantics/dump-expr.h b/flang/include/flang/Semantics/dump-expr.h
index 2dbd4cb60be5..5a78e13b19e5 100644
--- a/flang/include/flang/Semantics/dump-expr.h
+++ b/flang/include/flang/Semantics/dump-expr.h
@@ -48,10 +48,11 @@ private:
// "... [with T = xyz; std::string_view = ...]"
#ifdef __clang__
std::string_view front("[T = ");
+ std::string_view back("]");
#else
std::string_view front("[with T = ");
-#endif
std::string_view back("; std::string_view =");
+#endif
#elif defined(_MSC_VER)
#define DUMP_EXPR_SHOW_TYPE
diff --git a/flang/lib/Parser/prescan.cpp b/flang/lib/Parser/prescan.cpp
index 4739da0676fa..fd69404f313d 100644
--- a/flang/lib/Parser/prescan.cpp
+++ b/flang/lib/Parser/prescan.cpp
@@ -557,7 +557,7 @@ bool Prescanner::MustSkipToEndOfLine() const {
return true; // skip over ignored columns in right margin (73:80)
} else if (*at_ == '!' && !inCharLiteral_ &&
(!inFixedForm_ || tabInCurrentLine_ || column_ != 6)) {
- return !IsCompilerDirectiveSentinel(at_);
+ return !IsCompilerDirectiveSentinel(at_ + 1);
} else {
return false;
}
diff --git a/flang/test/Parser/inline-directives.f90 b/flang/test/Parser/inline-directives.f90
new file mode 100644
index 000000000000..24d4f95759a6
--- /dev/null
+++ b/flang/test/Parser/inline-directives.f90
@@ -0,0 +1,29 @@
+! RUN: %flang_fc1 -fdebug-unparse %s 2>&1 | FileCheck %s
+
+! Test that checks whether compiler directives can be inlined without mistaking it as comment.
+
+module m
+contains
+#define MACRO(X) subroutine func1(X); real(2) :: X; !dir$ ignore_tkr(d) X; end subroutine func1;
+MACRO(foo)
+
+!CHECK: SUBROUTINE func1 (foo)
+!CHECK: !DIR$ IGNORE_TKR (d) foo
+!CHECK: END SUBROUTINE func1
+
+ subroutine func2(foo)
+ real(2) :: foo; !dir$ ignore_tkr(d) foo;
+ end subroutine func2
+
+!CHECK: SUBROUTINE func2 (foo)
+!CHECK: !DIR$ IGNORE_TKR (d) foo
+!CHECK: END SUBROUTINE func2
+
+ subroutine func3(foo)
+ real(2) :: foo; !dir$ ignore_tkr(d) foo; end subroutine func3;
+
+!CHECK: SUBROUTINE func3 (foo)
+!CHECK: !DIR$ IGNORE_TKR (d) foo
+!CHECK: END SUBROUTINE func3
+
+end module
diff --git a/libc/CMakeLists.txt b/libc/CMakeLists.txt
index 14718e2090bd..ae555a256ba6 100644
--- a/libc/CMakeLists.txt
+++ b/libc/CMakeLists.txt
@@ -363,7 +363,7 @@ elseif(LLVM_LIBC_FULL_BUILD)
message(FATAL_ERROR "${LIBC_CONFIG_PATH}/headers.txt file not found and fullbuild requested.")
endif()
-# Check exclude.txt that appends to LIBC_EXCLUDE_ENTRYPOINTS list
+# Check exclude.txt that appends to TARGET_LLVMLIBC_REMOVED_ENTRYPOINTS list
if(EXISTS "${LIBC_CONFIG_PATH}/exclude.txt")
include("${LIBC_CONFIG_PATH}/exclude.txt")
endif()
diff --git a/libc/config/linux/x86_64/exclude.txt b/libc/config/linux/x86_64/exclude.txt
index 2c218b753b17..a0686310d21a 100644
--- a/libc/config/linux/x86_64/exclude.txt
+++ b/libc/config/linux/x86_64/exclude.txt
@@ -19,3 +19,11 @@ if(NOT has_sys_random)
)
endif()
endif()
+
+include(CheckSymbolExists)
+check_symbol_exists(SYS_faccessat2 "sys/syscall.h" HAVE_SYS_FACCESSAT2)
+if(NOT HAVE_SYS_FACCESSAT2)
+ list(APPEND TARGET_LLVMLIBC_REMOVED_ENTRYPOINTS
+ libc.src.unistd.faccessat
+ )
+endif()
diff --git a/libc/include/locale.yaml b/libc/include/locale.yaml
index 4566984ad83a..3c3998eb07aa 100644
--- a/libc/include/locale.yaml
+++ b/libc/include/locale.yaml
@@ -1,7 +1,7 @@
header: locale.h
header_template: locale.h.def
macros:
- - macro_name: NULL
+ - macro_name: "NULL"
macro_header: null-macro.h
types:
- type_name: locale_t
diff --git a/libc/include/stdio.yaml b/libc/include/stdio.yaml
index 394437ba3bbc..c50b4ecb0bf0 100644
--- a/libc/include/stdio.yaml
+++ b/libc/include/stdio.yaml
@@ -1,7 +1,7 @@
header: stdio.h
header_template: stdio.h.def
macros:
- - macro_name: NULL
+ - macro_name: "NULL"
macro_header: null-macro.h
- macro_name: stdout
macro_value: stdout
diff --git a/libc/include/stdlib.yaml b/libc/include/stdlib.yaml
index 3b2ff13c684b..495eb7e1317b 100644
--- a/libc/include/stdlib.yaml
+++ b/libc/include/stdlib.yaml
@@ -5,7 +5,7 @@ standards:
merge_yaml_files:
- stdlib-malloc.yaml
macros:
- - macro_name: NULL
+ - macro_name: "NULL"
macro_header: null-macro.h
types:
- type_name: __atexithandler_t
diff --git a/libc/include/string.yaml b/libc/include/string.yaml
index 0bf297ee747a..22010f4afa81 100644
--- a/libc/include/string.yaml
+++ b/libc/include/string.yaml
@@ -2,7 +2,7 @@ header: string.h
standards:
- stdc
macros:
- - macro_name: NULL
+ - macro_name: "NULL"
macro_header: null-macro.h
types:
- type_name: locale_t
diff --git a/libc/include/time.yaml b/libc/include/time.yaml
index 2f8024298fad..88e50d128823 100644
--- a/libc/include/time.yaml
+++ b/libc/include/time.yaml
@@ -1,7 +1,7 @@
header: time.h
header_template: time.h.def
macros:
- - macro_name: NULL
+ - macro_name: "NULL"
macro_header: null-macro.h
types:
- type_name: struct_timeval
diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml
index b8a0a748cd3a..c8b9e21b56b2 100644
--- a/libc/include/wchar.yaml
+++ b/libc/include/wchar.yaml
@@ -1,7 +1,7 @@
header: wchar.h
header_template: wchar.h.def
macros:
- - macro_name: NULL
+ - macro_name: "NULL"
macro_header: null-macro.h
types:
- type_name: FILE
@@ -188,8 +188,8 @@ functions:
standards:
- stdc
return_type: wchar_t *
- arguments:
- - type: wchar_t *__restrict
+ arguments:
+ - type: wchar_t *__restrict
- type: const wchar_t *__restrict
- type: size_t
- name: wmemmove
@@ -212,7 +212,7 @@ functions:
standards:
- stdc
return_type: wchar_t *
- arguments:
+ arguments:
- type: wchar_t *__restrict
- type: const wchar_t *__restrict
- name: wcslcat
diff --git a/libc/src/time/strftime.cpp b/libc/src/time/strftime.cpp
index f36091bc9736..89b7d9bb7c1b 100644
--- a/libc/src/time/strftime.cpp
+++ b/libc/src/time/strftime.cpp
@@ -26,7 +26,7 @@ LLVM_LIBC_FUNCTION(size_t, strftime,
int ret = strftime_core::strftime_main(&writer, format, timeptr);
if (buffsz > 0) // if the buffsz is 0 the buffer may be a null pointer.
wb.buff[wb.buff_cur] = '\0';
- return (ret < 0 || static_cast<size_t>(ret) > buffsz) ? 0 : ret;
+ return (ret < 0 || static_cast<size_t>(ret) >= buffsz) ? 0 : ret;
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/time/strftime_l.cpp b/libc/src/time/strftime_l.cpp
index 201b85da39ee..409f8683b728 100644
--- a/libc/src/time/strftime_l.cpp
+++ b/libc/src/time/strftime_l.cpp
@@ -29,7 +29,7 @@ LLVM_LIBC_FUNCTION(size_t, strftime_l,
int ret = strftime_core::strftime_main(&writer, format, timeptr);
if (buffsz > 0) // if the buffsz is 0 the buffer may be a null pointer.
wb.buff[wb.buff_cur] = '\0';
- return (ret < 0 || static_cast<size_t>(ret) > buffsz) ? 0 : ret;
+ return (ret < 0 || static_cast<size_t>(ret) >= buffsz) ? 0 : ret;
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/test/src/time/strftime_test.cpp b/libc/test/src/time/strftime_test.cpp
index cac7560b2b94..38176f77804d 100644
--- a/libc/test/src/time/strftime_test.cpp
+++ b/libc/test/src/time/strftime_test.cpp
@@ -2326,3 +2326,23 @@ TEST(LlvmLibcStrftimeTest, TimeFormatFullDateTime) {
// size_t written = 0;
// SimplePaddedNum spn;
// }
+
+TEST(LlvmLibcStrftimeTest, BufferTooSmall) {
+ struct tm time;
+ char buffer[1];
+
+ time.tm_year = get_adjusted_year(2025);
+ time.tm_mon = 10;
+ time.tm_mday = 24;
+
+ size_t written =
+ LIBC_NAMESPACE::strftime(buffer, sizeof(buffer), "%F", &time);
+ EXPECT_EQ(written, size_t{0});
+
+ char buffer2[10];
+
+ // The string "2025-11-24" is 10 chars,
+ // so strftime needs 10 + 1 bytes to write the string and the null terminator.
+ written = LIBC_NAMESPACE::strftime(buffer, sizeof(buffer2), "%F", &time);
+ EXPECT_EQ(written, size_t{0});
+}
diff --git a/libc/utils/hdrgen/hdrgen/enumeration.py b/libc/utils/hdrgen/hdrgen/enumeration.py
index 198720826720..1e0f64aec1ed 100644
--- a/libc/utils/hdrgen/hdrgen/enumeration.py
+++ b/libc/utils/hdrgen/hdrgen/enumeration.py
@@ -6,24 +6,14 @@
#
# ==-------------------------------------------------------------------------==#
-from functools import total_ordering
+from hdrgen.symbol import Symbol
-@total_ordering
-class Enumeration:
+class Enumeration(Symbol):
def __init__(self, name, value):
- self.name = name
+ super().__init__(name)
self.value = value
- def __eq__(self, other):
- return self.name == other.name
-
- def __lt__(self, other):
- return self.name < other.name
-
- def __hash__(self):
- return self.name.__hash__()
-
def __str__(self):
if self.value != None:
return f"{self.name} = {self.value}"
diff --git a/libc/utils/hdrgen/hdrgen/function.py b/libc/utils/hdrgen/hdrgen/function.py
index f039996584e3..4de3406cc408 100644
--- a/libc/utils/hdrgen/hdrgen/function.py
+++ b/libc/utils/hdrgen/hdrgen/function.py
@@ -7,7 +7,7 @@
# ==-------------------------------------------------------------------------==#
import re
-from functools import total_ordering
+from hdrgen.symbol import Symbol
from hdrgen.type import Type
@@ -37,14 +37,13 @@ KEYWORDS = [
NONIDENTIFIER = re.compile("[^a-zA-Z0-9_]+")
-@total_ordering
-class Function:
+class Function(Symbol):
def __init__(
self, return_type, name, arguments, standards, guard=None, attributes=[]
):
+ super().__init__(name)
assert return_type
self.return_type = return_type
- self.name = name
self.arguments = [
arg if isinstance(arg, str) else arg["type"] for arg in arguments
]
@@ -53,15 +52,6 @@ class Function:
self.guard = guard
self.attributes = attributes or []
- def __eq__(self, other):
- return self.name == other.name
-
- def __lt__(self, other):
- return self.name < other.name
-
- def __hash__(self):
- return self.name.__hash__()
-
def signature_types(self):
def collapse(type_string):
assert type_string
diff --git a/libc/utils/hdrgen/hdrgen/header.py b/libc/utils/hdrgen/hdrgen/header.py
index 715d4b7c9b7e..f592327f06ad 100644
--- a/libc/utils/hdrgen/hdrgen/header.py
+++ b/libc/utils/hdrgen/hdrgen/header.py
@@ -35,6 +35,13 @@ NONIDENTIFIER = re.compile("[^a-zA-Z0-9_]+")
COMMON_HEADER = PurePosixPath("__llvm-libc-common.h")
+# These "attributes" are known macros defined in COMMON_HEADER.
+# Others are found in "llvm-libc-macros/{name}.h".
+COMMON_ATTRIBUTES = {
+ "_Noreturn",
+ "_Returns_twice",
+}
+
# All the canonical identifiers are in lowercase for easy maintenance.
# This maps them to the pretty descriptions to generate in header comments.
LIBRARY_DESCRIPTIONS = {
@@ -50,9 +57,7 @@ LIBRARY_DESCRIPTIONS = {
HEADER_TEMPLATE = """\
//===-- {library} header <{header}> --===//
//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+{license_lines}
//
//===---------------------------------------------------------------------===//
@@ -64,6 +69,12 @@ HEADER_TEMPLATE = """\
#endif // {guard}
"""
+LLVM_LICENSE_TEXT = [
+ "Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.",
+ "See https://llvm.org/LICENSE.txt for license information.",
+ "SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception",
+]
+
class HeaderFile:
def __init__(self, name):
@@ -74,8 +85,10 @@ class HeaderFile:
self.enumerations = []
self.objects = []
self.functions = []
+ self.extra_standards = {}
self.standards = []
self.merge_yaml_files = []
+ self.license_text = []
def add_macro(self, macro):
self.macros.append(macro)
@@ -98,6 +111,11 @@ class HeaderFile:
self.enumerations = sorted(set(self.enumerations) | set(other.enumerations))
self.objects = sorted(set(self.objects) | set(other.objects))
self.functions = sorted(set(self.functions) | set(other.functions))
+ self.extra_standards |= other.extra_standards
+ if self.license_text:
+ assert not other.license_text, "only one `license_text` allowed"
+ else:
+ self.license_text = other.license_text
def all_types(self):
return reduce(
@@ -106,6 +124,13 @@ class HeaderFile:
set(self.types),
)
+ def all_attributes(self):
+ return reduce(
+ lambda a, b: a | b,
+ [set(f.attributes) for f in self.functions],
+ set(),
+ )
+
def all_standards(self):
# FIXME: Only functions have the "standard" field, but all the entity
# types should have one too.
@@ -114,16 +139,24 @@ class HeaderFile:
)
def includes(self):
- return {
- PurePosixPath("llvm-libc-macros") / macro.header
- for macro in self.macros
- if macro.header is not None
- } | {
- COMPILER_HEADER_TYPES.get(
- typ.type_name, PurePosixPath("llvm-libc-types") / f"{typ.type_name}.h"
- )
- for typ in self.all_types()
- }
+ return (
+ {
+ PurePosixPath("llvm-libc-macros") / macro.header
+ for macro in self.macros
+ if macro.header is not None
+ }
+ | {
+ COMPILER_HEADER_TYPES.get(
+ typ.name,
+ PurePosixPath("llvm-libc-types") / f"{typ.name}.h",
+ )
+ for typ in self.all_types()
+ }
+ | {
+ PurePosixPath("llvm-libc-macros") / f"{attr}.h"
+ for attr in self.all_attributes() - COMMON_ATTRIBUTES
+ }
+ )
def header_guard(self):
return "_LLVM_LIBC_" + "_".join(
@@ -131,24 +164,29 @@ class HeaderFile:
)
def library_description(self):
+ descriptions = LIBRARY_DESCRIPTIONS | self.extra_standards
# If the header itself is in standard C, just call it that.
if "stdc" in self.standards:
- return LIBRARY_DESCRIPTIONS["stdc"]
+ return descriptions["stdc"]
# If the header itself is in POSIX, just call it that.
if "posix" in self.standards:
- return LIBRARY_DESCRIPTIONS["posix"]
+ return descriptions["posix"]
# Otherwise, consider the standards for each symbol as well.
standards = self.all_standards()
# Otherwise, it's described by all those that apply, but ignoring
# "stdc" and "posix" since this is not a "stdc" or "posix" header.
return " / ".join(
sorted(
- LIBRARY_DESCRIPTIONS[standard]
+ descriptions[standard]
for standard in standards
if standard not in {"stdc", "posix"}
)
)
+ def license_lines(self):
+ lines = self.license_text or LLVM_LICENSE_TEXT
+ return "\n".join([f"// {line}" for line in lines])
+
def template(self, dir, files_read):
if self.template_file is not None:
# There's a custom template file, so just read it in and record
@@ -162,6 +200,7 @@ class HeaderFile:
library=self.library_description(),
header=self.name,
guard=self.header_guard(),
+ license_lines=self.license_lines(),
)
def public_api(self):
@@ -188,7 +227,7 @@ class HeaderFile:
)
]
- for macro in self.macros:
+ for macro in sorted(self.macros):
# When there is nothing to define, the Macro object converts to str
# as an empty string. Don't emit a blank line for those cases.
if str(macro):
@@ -203,7 +242,12 @@ class HeaderFile:
content.append("\n__BEGIN_C_DECLS\n")
current_guard = None
- for function in self.functions:
+ last_name = None
+ for function in sorted(self.functions):
+ # If the last function's name was the same after underscores,
+ # elide the blank line between the declarations.
+ if last_name == function.name_without_underscores():
+ content.pop()
if function.guard == None and current_guard == None:
content.append(str(function) + " __NOEXCEPT;")
content.append("")
@@ -225,6 +269,7 @@ class HeaderFile:
content.append(f"#ifdef {current_guard}")
content.append(str(function) + " __NOEXCEPT;")
content.append("")
+ last_name = function.name_without_underscores()
if current_guard != None:
content.pop()
content.append(f"#endif // {current_guard}")
diff --git a/libc/utils/hdrgen/hdrgen/macro.py b/libc/utils/hdrgen/hdrgen/macro.py
index e42e82845694..4664d9fb0049 100644
--- a/libc/utils/hdrgen/hdrgen/macro.py
+++ b/libc/utils/hdrgen/hdrgen/macro.py
@@ -6,25 +6,15 @@
#
# ==-------------------------------------------------------------------------==#
-from functools import total_ordering
+from hdrgen.symbol import Symbol
-@total_ordering
-class Macro:
+class Macro(Symbol):
def __init__(self, name, value=None, header=None):
- self.name = name
+ super().__init__(name)
self.value = value
self.header = header
- def __eq__(self, other):
- return self.name == other.name
-
- def __lt__(self, other):
- return self.name < other.name
-
- def __hash__(self):
- return self.name.__hash__()
-
def __str__(self):
if self.header != None:
return ""
diff --git a/libc/utils/hdrgen/hdrgen/main.py b/libc/utils/hdrgen/hdrgen/main.py
index 25df41e506a1..c12e89ef771d 100755
--- a/libc/utils/hdrgen/hdrgen/main.py
+++ b/libc/utils/hdrgen/hdrgen/main.py
@@ -105,6 +105,7 @@ def main():
return 2
header.merge(merge_from_header)
+ assert header.name, f"`header: name.h` line is required in {yaml_file}"
return header
if args.json:
diff --git a/libc/utils/hdrgen/hdrgen/object.py b/libc/utils/hdrgen/hdrgen/object.py
index a311c37168d6..a2ab496bed01 100644
--- a/libc/utils/hdrgen/hdrgen/object.py
+++ b/libc/utils/hdrgen/hdrgen/object.py
@@ -6,23 +6,13 @@
#
# ==-------------------------------------------------------------------------==#
-from functools import total_ordering
+from hdrgen.symbol import Symbol
-@total_ordering
-class Object:
+class Object(Symbol):
def __init__(self, name, type):
- self.name = name
+ super().__init__(name)
self.type = type
- def __eq__(self, other):
- return self.name == other.name
-
- def __lt__(self, other):
- return self.name < other.name
-
- def __hash__(self):
- return self.name.__hash__()
-
def __str__(self):
return f"extern {self.type} {self.name};"
diff --git a/libc/utils/hdrgen/hdrgen/symbol.py b/libc/utils/hdrgen/hdrgen/symbol.py
new file mode 100644
index 000000000000..28e9def128e4
--- /dev/null
+++ b/libc/utils/hdrgen/hdrgen/symbol.py
@@ -0,0 +1,41 @@
+# ====-- Symbol class for libc function headers----------------*- python -*--==#
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# ==-------------------------------------------------------------------------==#
+
+from functools import total_ordering
+
+
+@total_ordering
+class Symbol:
+ """
+ Symbol is the common superclass for each kind of entity named by an
+ identifier. It provides the name field, and defines sort ordering,
+ hashing, and equality based only on the name. The sorting is pretty
+ presentation order for identifiers, which is to say it first sorts
+ lexically but ignores leading underscores and secondarily sorts with the
+ fewest underscores first.
+ """
+
+ def __init__(self, name):
+ assert name
+ self.name = name
+
+ def __eq__(self, other):
+ return self.name == other.name
+
+ def __hash__(self):
+ return self.name.__hash__()
+
+ def name_without_underscores(self):
+ return self.name.lstrip("_")
+
+ def name_sort_key(self):
+ ident = self.name_without_underscores()
+ return ident, len(self.name) - len(ident)
+
+ def __lt__(self, other):
+ return self.name_sort_key() < other.name_sort_key()
diff --git a/libc/utils/hdrgen/hdrgen/type.py b/libc/utils/hdrgen/hdrgen/type.py
index 0c0af8569c61..20c1881a9379 100644
--- a/libc/utils/hdrgen/hdrgen/type.py
+++ b/libc/utils/hdrgen/hdrgen/type.py
@@ -6,20 +6,10 @@
#
# ==-------------------------------------------------------------------------==#
-from functools import total_ordering
+from hdrgen.symbol import Symbol
-@total_ordering
-class Type:
- def __init__(self, type_name):
- assert type_name
- self.type_name = type_name
-
- def __eq__(self, other):
- return self.type_name == other.type_name
-
- def __lt__(self, other):
- return self.type_name < other.type_name
-
- def __hash__(self):
- return self.type_name.__hash__()
+class Type(Symbol):
+ # A type so far carries no specific information beyond its name.
+ def __init__(self, name):
+ super().__init__(name)
diff --git a/libc/utils/hdrgen/hdrgen/yaml_to_classes.py b/libc/utils/hdrgen/hdrgen/yaml_to_classes.py
index ebe7781d449f..9eddbe615cbb 100644
--- a/libc/utils/hdrgen/hdrgen/yaml_to_classes.py
+++ b/libc/utils/hdrgen/hdrgen/yaml_to_classes.py
@@ -37,6 +37,8 @@ def yaml_to_classes(yaml_data, header_class, entry_points=None):
header = header_class(header_name)
header.template_file = yaml_data.get("header_template")
header.standards = yaml_data.get("standards", [])
+ header.extra_standards = yaml_data.get("extra_standards", {})
+ header.license_text = yaml_data.get("license_text", [])
header.merge_yaml_files = yaml_data.get("merge_yaml_files", [])
for macro_data in yaml_data.get("macros", []):
diff --git a/libc/utils/hdrgen/tests/expected_output/custom.h b/libc/utils/hdrgen/tests/expected_output/custom.h
new file mode 100644
index 000000000000..5f9ed231490f
--- /dev/null
+++ b/libc/utils/hdrgen/tests/expected_output/custom.h
@@ -0,0 +1,21 @@
+//===-- Wile E. Coyote header <custom.h> --===//
+//
+// Caveat emptor.
+// I never studied law.
+//
+//===---------------------------------------------------------------------===//
+
+#ifndef _LLVM_LIBC_CUSTOM_H
+#define _LLVM_LIBC_CUSTOM_H
+
+#include "__llvm-libc-common.h"
+#include "llvm-libc-types/meep.h"
+#include "llvm-libc-types/road.h"
+
+__BEGIN_C_DECLS
+
+road runner(meep, meep) __NOEXCEPT;
+
+__END_C_DECLS
+
+#endif // _LLVM_LIBC_CUSTOM_H
diff --git a/libc/utils/hdrgen/tests/expected_output/sorting.h b/libc/utils/hdrgen/tests/expected_output/sorting.h
new file mode 100644
index 000000000000..a091a421b2c3
--- /dev/null
+++ b/libc/utils/hdrgen/tests/expected_output/sorting.h
@@ -0,0 +1,24 @@
+//===-- Standard C header <sorting.h> --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===---------------------------------------------------------------------===//
+
+#ifndef _LLVM_LIBC_SORTING_H
+#define _LLVM_LIBC_SORTING_H
+
+#include "__llvm-libc-common.h"
+
+__BEGIN_C_DECLS
+
+void func_with_aliases(int) __NOEXCEPT;
+void _func_with_aliases(int) __NOEXCEPT;
+void __func_with_aliases(int) __NOEXCEPT;
+
+void gunk(const char *) __NOEXCEPT;
+
+__END_C_DECLS
+
+#endif // _LLVM_LIBC_SORTING_H
diff --git a/libc/utils/hdrgen/tests/expected_output/test_header.h b/libc/utils/hdrgen/tests/expected_output/test_header.h
index 748c09808c12..49112a353f7b 100644
--- a/libc/utils/hdrgen/tests/expected_output/test_header.h
+++ b/libc/utils/hdrgen/tests/expected_output/test_header.h
@@ -12,6 +12,7 @@
#include "__llvm-libc-common.h"
#include "llvm-libc-macros/float16-macros.h"
+#include "llvm-libc-macros/CONST_FUNC_A.h"
#include "llvm-libc-macros/test_more-macros.h"
#include "llvm-libc-macros/test_small-macros.h"
#include "llvm-libc-types/float128.h"
diff --git a/libc/utils/hdrgen/tests/expected_output/test_small.json b/libc/utils/hdrgen/tests/expected_output/test_small.json
index 9cc73d013a67..8502df23b9a4 100644
--- a/libc/utils/hdrgen/tests/expected_output/test_small.json
+++ b/libc/utils/hdrgen/tests/expected_output/test_small.json
@@ -4,6 +4,7 @@
"standards": [],
"includes": [
"__llvm-libc-common.h",
+ "llvm-libc-macros/CONST_FUNC_A.h",
"llvm-libc-macros/test_more-macros.h",
"llvm-libc-macros/test_small-macros.h",
"llvm-libc-types/float128.h",
diff --git a/libc/utils/hdrgen/tests/input/custom-common.yaml b/libc/utils/hdrgen/tests/input/custom-common.yaml
new file mode 100644
index 000000000000..909a3ba5163a
--- /dev/null
+++ b/libc/utils/hdrgen/tests/input/custom-common.yaml
@@ -0,0 +1,6 @@
+license_text:
+ - Caveat emptor.
+ - I never studied law.
+
+extra_standards:
+ acme: Wile E. Coyote
diff --git a/libc/utils/hdrgen/tests/input/custom.yaml b/libc/utils/hdrgen/tests/input/custom.yaml
new file mode 100644
index 000000000000..7d3ff8ec421d
--- /dev/null
+++ b/libc/utils/hdrgen/tests/input/custom.yaml
@@ -0,0 +1,13 @@
+merge_yaml_files:
+ - custom-common.yaml
+
+header: custom.h
+standards:
+ - acme
+
+functions:
+ - name: runner
+ return_type: road
+ arguments:
+ - type: meep
+ - type: meep
diff --git a/libc/utils/hdrgen/tests/input/sorting.yaml b/libc/utils/hdrgen/tests/input/sorting.yaml
new file mode 100644
index 000000000000..3c26cde9e6c4
--- /dev/null
+++ b/libc/utils/hdrgen/tests/input/sorting.yaml
@@ -0,0 +1,20 @@
+header: sorting.h
+standards:
+ - stdc
+functions:
+ - name: gunk
+ return_type: void
+ arguments:
+ - type: const char *
+ - name: _func_with_aliases
+ return_type: void
+ arguments:
+ - type: int
+ - name: func_with_aliases
+ return_type: void
+ arguments:
+ - type: int
+ - name: __func_with_aliases
+ return_type: void
+ arguments:
+ - type: int
diff --git a/libc/utils/hdrgen/tests/test_integration.py b/libc/utils/hdrgen/tests/test_integration.py
index bf393d26a810..b975d8ff007b 100644
--- a/libc/utils/hdrgen/tests/test_integration.py
+++ b/libc/utils/hdrgen/tests/test_integration.py
@@ -59,6 +59,13 @@ class TestHeaderGenIntegration(unittest.TestCase):
self.run_script(yaml_file, output_file)
self.compare_files(output_file, expected_output_file)
+ def test_custom_license_and_standards(self):
+ yaml_file = self.source_dir / "input" / "custom.yaml"
+ expected_output_file = self.source_dir / "expected_output" / "custom.h"
+ output_file = self.output_dir / "custom.h"
+ self.run_script(yaml_file, output_file)
+ self.compare_files(output_file, expected_output_file)
+
def test_generate_json(self):
yaml_file = self.source_dir / "input/test_small.yaml"
expected_output_file = self.source_dir / "expected_output/test_small.json"
@@ -68,6 +75,13 @@ class TestHeaderGenIntegration(unittest.TestCase):
self.compare_files(output_file, expected_output_file)
+ def test_sorting(self):
+ yaml_file = self.source_dir / "input" / "sorting.yaml"
+ expected_output_file = self.source_dir / "expected_output" / "sorting.h"
+ output_file = self.output_dir / "sorting.h"
+ self.run_script(yaml_file, output_file)
+ self.compare_files(output_file, expected_output_file)
+
def main():
parser = argparse.ArgumentParser(description="TestHeaderGenIntegration arguments")
diff --git a/libcxx/test/libcxx/input.output/iostreams.base/ios.base/ios.base.cons/dtor.uninitialized.pass.cpp b/libcxx/test/libcxx/input.output/iostreams.base/ios.base/ios.base.cons/dtor.uninitialized.pass.cpp
index f17c1483c4a9..16d66e3be14e 100644
--- a/libcxx/test/libcxx/input.output/iostreams.base/ios.base/ios.base.cons/dtor.uninitialized.pass.cpp
+++ b/libcxx/test/libcxx/input.output/iostreams.base/ios.base/ios.base.cons/dtor.uninitialized.pass.cpp
@@ -6,14 +6,12 @@
//
//===----------------------------------------------------------------------===//
-// TODO(mordante) Investigate
-// UNSUPPORTED: apple-clang
-
// UNSUPPORTED: no-exceptions
// The fix for issue 57964 requires an updated dylib due to explicit
// instantiations. That means Apple backdeployment targets remain broken.
-// XFAIL: using-built-library-before-llvm-19
+// TODO: Remove && !darwin once availability markup for LLVM 19 on macOS has been added
+// XFAIL: using-built-library-before-llvm-19 && !darwin
// <ios>
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/filebuf.virtuals/setbuf.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/filebuf.virtuals/setbuf.pass.cpp
index 9d14abcedd42..00aa97a45cc2 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/filebuf.virtuals/setbuf.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/filebuf.virtuals/setbuf.pass.cpp
@@ -6,16 +6,14 @@
//
//===----------------------------------------------------------------------===//
-// TODO(mordante) Investigate
-// UNSUPPORTED: apple-clang
-
// <fstream>
// basic_streambuf<charT, traits>* setbuf(char_type* s, streamsize n) override;
// This test requires the fix to https://llvm.org/PR60509 in the dylib,
// which landed in 5afb937d8a30445642ccaf33866ee4cdd0713222.
-// XFAIL: using-built-library-before-llvm-19
+// TODO: Remove && !darwin once availability markup for LLVM 19 on macOS has been added
+// XFAIL: using-built-library-before-llvm-19 && !darwin
#include <fstream>
#include <cstddef>
diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/sync.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/sync.pass.cpp
index 3b685950d36a..b04d2c07ebb1 100644
--- a/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/sync.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/sync.pass.cpp
@@ -6,9 +6,6 @@
//
//===----------------------------------------------------------------------===//
-// TODO(mordante) Investigate
-// UNSUPPORTED: apple-clang
-
// <istream>
// int sync();
@@ -16,7 +13,8 @@
// The fix for bug 51497 and bug 51499 require and updated dylib due to
// explicit instantiations. That means Apple backdeployment targets remain
// broken.
-// XFAIL: using-built-library-before-llvm-19
+// TODO: Remove && !darwin once availability markup for LLVM 19 on macOS has been added
+// XFAIL: using-built-library-before-llvm-19 && !darwin
#include <istream>
#include <cassert>
diff --git a/libcxx/test/std/localization/locale.categories/category.collate/locale.collate.byname/compare.pass.cpp b/libcxx/test/std/localization/locale.categories/category.collate/locale.collate.byname/compare.pass.cpp
index 4905ed40f4a2..8ae6bc2d3ba6 100644
--- a/libcxx/test/std/localization/locale.categories/category.collate/locale.collate.byname/compare.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.collate/locale.collate.byname/compare.pass.cpp
@@ -6,9 +6,6 @@
//
//===----------------------------------------------------------------------===//
-// TODO(mordante) Investigate
-// UNSUPPORTED: apple-clang
-
// Bionic has minimal locale support, investigate this later.
// XFAIL: LIBCXX-ANDROID-FIXME
@@ -56,14 +53,7 @@ int main(int, char**)
ASSERT_COMPARE(std::string, "AAA", "BBB", -1);
ASSERT_COMPARE(std::string, "bbb", "aaa", 1);
ASSERT_COMPARE(std::string, "ccc", "ccc", 0);
-
-#if defined(__APPLE__)
- // Apple's default collation is case-sensitive
- ASSERT_COMPARE(std::string, "aaaaaaA", "BaaaaaA", 1);
-#else
- // Glibc, Windows, and FreeBSD's default collation is case-insensitive
ASSERT_COMPARE(std::string, "aaaaaaA", "BaaaaaA", -1);
-#endif
}
#ifndef TEST_HAS_NO_WIDE_CHARACTERS
{
@@ -73,13 +63,7 @@ int main(int, char**)
ASSERT_COMPARE(std::wstring, L"AAA", L"BBB", -1);
ASSERT_COMPARE(std::wstring, L"bbb", L"aaa", 1);
ASSERT_COMPARE(std::wstring, L"ccc", L"ccc", 0);
-#if defined(__APPLE__)
- // Apple's default collation is case-sensitive
- ASSERT_COMPARE(std::wstring, L"aaaaaaA", L"BaaaaaA", 1);
-#else
- // Glibc, Windows, and FreeBSD's default collation is case-insensitive
ASSERT_COMPARE(std::wstring, L"aaaaaaA", L"BaaaaaA", -1);
-#endif
}
#endif
}
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_fr_FR.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_fr_FR.pass.cpp
index ea6b07934510..c9ed59f3cb9a 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_fr_FR.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_fr_FR.pass.cpp
@@ -6,11 +6,6 @@
//
//===----------------------------------------------------------------------===//
-// TODO(mordante) Investigate
-// UNSUPPORTED: apple-clang
-
-// XFAIL: darwin
-
// NetBSD does not support LC_MONETARY at the moment
// XFAIL: netbsd
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_ru_RU.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_ru_RU.pass.cpp
index f98758d086de..371cf0e90c8d 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_ru_RU.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_ru_RU.pass.cpp
@@ -6,9 +6,6 @@
//
//===----------------------------------------------------------------------===//
-// TODO(mordante) Investigate
-// UNSUPPORTED: apple-clang
-
// NetBSD does not support LC_MONETARY at the moment
// XFAIL: netbsd
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_zh_CN.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_zh_CN.pass.cpp
index 6980b7ae77db..c86df7e6b53b 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_zh_CN.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_zh_CN.pass.cpp
@@ -6,9 +6,6 @@
//
//===----------------------------------------------------------------------===//
-// TODO(mordante) Investigate
-// UNSUPPORTED: apple-clang
-
// NetBSD does not support LC_MONETARY at the moment
// XFAIL: netbsd
@@ -158,7 +155,7 @@ int main(int, char**)
std::noshowbase(ios);
}
{ // negative one, showbase
-#ifdef _AIX
+#if defined(_AIX) || defined(__APPLE__)
std::string v = "-" + currency_symbol + "0.01";
#else
std::string v = currency_symbol + "-0.01";
@@ -172,7 +169,7 @@ int main(int, char**)
assert(ex == -1);
}
{ // negative one, showbase
-#ifdef _AIX
+#if defined(_AIX) || defined(__APPLE__)
std::string v = "-" + currency_symbol + "0.01";
#else
std::string v = currency_symbol + "-0.01";
@@ -212,7 +209,7 @@ int main(int, char**)
std::noshowbase(ios);
}
{ // negative, showbase
-#ifdef _AIX
+#if defined(_AIX) || defined(__APPLE__)
std::string v = "-" + currency_symbol + "1,234,567.89";
#else
std::string v = currency_symbol + "-1,234,567.89";
@@ -333,7 +330,7 @@ int main(int, char**)
std::noshowbase(ios);
}
{ // negative one, showbase
-#if defined(TEST_HAS_GLIBC) || defined(_AIX)
+#if defined(TEST_HAS_GLIBC) || defined(_AIX) || defined(__APPLE__)
std::string v = "-" + currency_name + "0.01";
#else
std::string v = currency_name + "-0.01";
@@ -348,7 +345,7 @@ int main(int, char**)
assert(ex == -1);
}
{ // negative one, showbase
-#if defined(TEST_HAS_GLIBC) || defined(_AIX)
+#if defined(TEST_HAS_GLIBC) || defined(_AIX) || defined(__APPLE__)
std::string v = "-" + currency_name + "0.01";
#else
std::string v = currency_name + "-0.01";
@@ -389,7 +386,7 @@ int main(int, char**)
std::noshowbase(ios);
}
{ // negative, showbase
-#if defined(TEST_HAS_GLIBC) || defined(_AIX)
+#if defined(TEST_HAS_GLIBC) || defined(_AIX) || defined(__APPLE__)
std::string v = "-" + currency_name + "1,234,567.89";
#else
std::string v = currency_name + "-1,234,567.89";
@@ -518,7 +515,7 @@ int main(int, char**)
std::noshowbase(ios);
}
{ // negative one, showbase
-# ifdef _AIX
+# if defined(_AIX) || defined(__APPLE__)
std::wstring v = L"-" + w_currency_symbol + L"0.01";
# else
std::wstring v = w_currency_symbol + L"-0.01";
@@ -532,7 +529,7 @@ int main(int, char**)
assert(ex == -1);
}
{ // negative one, showbase
-# ifdef _AIX
+# if defined(_AIX) || defined(__APPLE__)
std::wstring v = L"-" + w_currency_symbol + L"0.01";
# else
std::wstring v = w_currency_symbol + L"-0.01";
@@ -572,7 +569,7 @@ int main(int, char**)
std::noshowbase(ios);
}
{ // negative, showbase
-# ifdef _AIX
+# if defined(_AIX) || defined(__APPLE__)
std::wstring v = L"-" + w_currency_symbol + L"1,234,567.89";
# else
std::wstring v = w_currency_symbol + L"-1,234,567.89";
@@ -693,7 +690,7 @@ int main(int, char**)
std::noshowbase(ios);
}
{ // negative one, showbase
-# if defined(TEST_HAS_GLIBC) || defined(_AIX)
+# if defined(TEST_HAS_GLIBC) || defined(_AIX) || defined(__APPLE__)
std::wstring v = L"-" + w_currency_name + L"0.01";
# else
std::wstring v = w_currency_name + L"-0.01";
@@ -707,7 +704,7 @@ int main(int, char**)
assert(ex == -1);
}
{ // negative one, showbase
-# if defined(TEST_HAS_GLIBC) || defined(_AIX)
+# if defined(TEST_HAS_GLIBC) || defined(_AIX) || defined(__APPLE__)
std::wstring v = L"-" + w_currency_name + L"0.01";
# else
std::wstring v = w_currency_name + L"-0.01";
@@ -747,7 +744,7 @@ int main(int, char**)
std::noshowbase(ios);
}
{ // negative, showbase
-# if defined(TEST_HAS_GLIBC) || defined(_AIX)
+# if defined(TEST_HAS_GLIBC) || defined(_AIX) || defined(__APPLE__)
std::wstring v = L"-" + w_currency_name + L"1,234,567.89";
# else
std::wstring v = w_currency_name + L"-1,234,567.89";
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_fr_FR.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_fr_FR.pass.cpp
index 14745996b9fd..f9d7998b07ff 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_fr_FR.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_fr_FR.pass.cpp
@@ -6,11 +6,6 @@
//
//===----------------------------------------------------------------------===//
-// TODO(mordante) Investigate
-// UNSUPPORTED: apple-clang
-
-// XFAIL: darwin
-
// NetBSD does not support LC_MONETARY at the moment
// XFAIL: netbsd
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_ru_RU.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_ru_RU.pass.cpp
index 0455e5949c44..be1e39748846 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_ru_RU.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_ru_RU.pass.cpp
@@ -6,9 +6,6 @@
//
//===----------------------------------------------------------------------===//
-// TODO(mordante) Investigate
-// UNSUPPORTED: apple-clang
-
// NetBSD does not support LC_MONETARY at the moment
// XFAIL: netbsd
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_zh_CN.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_zh_CN.pass.cpp
index 68640fabb73b..25046a841708 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_zh_CN.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_zh_CN.pass.cpp
@@ -6,9 +6,6 @@
//
//===----------------------------------------------------------------------===//
-// TODO(mordante) Investigate
-// UNSUPPORTED: apple-clang
-
// NetBSD does not support LC_MONETARY at the moment
// XFAIL: netbsd
@@ -122,7 +119,7 @@ int main(int, char**)
char str[100];
cpp17_output_iterator<char*> iter = f.put(cpp17_output_iterator<char*>(str), false, ios, '*', v);
std::string ex(str, base(iter));
-#ifdef _AIX
+#if defined(_AIX) || defined(__APPLE__)
assert(ex == "-" + currency_symbol + "0.01");
#else
assert(ex == currency_symbol + "-0.01");
@@ -142,7 +139,7 @@ int main(int, char**)
char str[100];
cpp17_output_iterator<char*> iter = f.put(cpp17_output_iterator<char*>(str), false, ios, '*', v);
std::string ex(str, base(iter));
-#ifdef _AIX
+#if defined(_AIX) || defined(__APPLE__)
assert(ex == "-" + currency_symbol + "1,234,567.89");
#else
assert(ex == currency_symbol + "-1,234,567.89");
@@ -156,7 +153,7 @@ int main(int, char**)
char str[100];
cpp17_output_iterator<char*> iter = f.put(cpp17_output_iterator<char*>(str), false, ios, ' ', v);
std::string ex(str, base(iter));
-#ifdef _AIX
+#if defined(_AIX) || defined(__APPLE__)
assert(ex == "-" + currency_symbol + "1,234,567.89" + currency_symbol_padding);
#else
assert(ex == currency_symbol + "-1,234,567.89" + currency_symbol_padding);
@@ -171,7 +168,7 @@ int main(int, char**)
char str[100];
cpp17_output_iterator<char*> iter = f.put(cpp17_output_iterator<char*>(str), false, ios, ' ', v);
std::string ex(str, base(iter));
-#ifdef _AIX
+#if defined(_AIX) || defined(__APPLE__)
assert(ex == "-" + currency_symbol + currency_symbol_padding + "1,234,567.89");
#else
assert(ex == currency_symbol + "-" + currency_symbol_padding + "1,234,567.89");
@@ -186,7 +183,7 @@ int main(int, char**)
char str[100];
cpp17_output_iterator<char*> iter = f.put(cpp17_output_iterator<char*>(str), false, ios, ' ', v);
std::string ex(str, base(iter));
-#ifdef _AIX
+#if defined(_AIX) || defined(__APPLE__)
assert(ex == currency_symbol_padding + "-" + currency_symbol + "1,234,567.89");
#else
assert(ex == currency_symbol_padding + currency_symbol + "-1,234,567.89");
@@ -239,7 +236,7 @@ int main(int, char**)
char str[100];
cpp17_output_iterator<char*> iter = f.put(cpp17_output_iterator<char*>(str), true, ios, '*', v);
std::string ex(str, base(iter));
-#if defined(TEST_HAS_GLIBC) || defined(_AIX)
+#if defined(TEST_HAS_GLIBC) || defined(_AIX) || defined(__APPLE__)
assert(ex == "-" + currency_name + "0.01");
#else
assert(ex == currency_name + "-0.01");
@@ -259,7 +256,7 @@ int main(int, char**)
char str[100];
cpp17_output_iterator<char*> iter = f.put(cpp17_output_iterator<char*>(str), true, ios, '*', v);
std::string ex(str, base(iter));
-#if defined(TEST_HAS_GLIBC) || defined(_AIX)
+#if defined(TEST_HAS_GLIBC) || defined(_AIX) || defined(__APPLE__)
assert(ex == "-" + currency_name + "1,234,567.89");
#else
assert(ex == currency_name + "-1,234,567.89");
@@ -273,7 +270,7 @@ int main(int, char**)
char str[100];
cpp17_output_iterator<char*> iter = f.put(cpp17_output_iterator<char*>(str), true, ios, ' ', v);
std::string ex(str, base(iter));
-#if defined(TEST_HAS_GLIBC) || defined(_AIX)
+#if defined(TEST_HAS_GLIBC) || defined(_AIX) || defined(__APPLE__)
assert(ex == "-" + currency_name + "1,234,567.89" + currency_name_padding);
#else
assert(ex == currency_name + "-1,234,567.89" + currency_name_padding);
@@ -288,7 +285,7 @@ int main(int, char**)
char str[100];
cpp17_output_iterator<char*> iter = f.put(cpp17_output_iterator<char*>(str), true, ios, ' ', v);
std::string ex(str, base(iter));
-#if defined(TEST_HAS_GLIBC) || defined(_AIX)
+#if defined(TEST_HAS_GLIBC) || defined(_AIX) || defined(__APPLE__)
assert(ex == "-" + currency_name + currency_name_padding + "1,234,567.89");
#else
assert(ex == currency_name + "-" + currency_name_padding + "1,234,567.89");
@@ -303,7 +300,7 @@ int main(int, char**)
char str[100];
cpp17_output_iterator<char*> iter = f.put(cpp17_output_iterator<char*>(str), true, ios, ' ', v);
std::string ex(str, base(iter));
-#if defined(TEST_HAS_GLIBC) || defined(_AIX)
+#if defined(TEST_HAS_GLIBC) || defined(_AIX) || defined(__APPLE__)
assert(ex == currency_name_padding + "-" + currency_name + "1,234,567.89");
#else
assert(ex == currency_name_padding + currency_name + "-1,234,567.89");
@@ -366,7 +363,7 @@ int main(int, char**)
wchar_t str[100];
cpp17_output_iterator<wchar_t*> iter = f.put(cpp17_output_iterator<wchar_t*>(str), false, ios, '*', v);
std::wstring ex(str, base(iter));
-# ifdef _AIX
+# if defined(_AIX) || defined(__APPLE__)
assert(ex == L"-" + currency_symbol + L"0.01");
# else
assert(ex == currency_symbol + L"-0.01");
@@ -386,7 +383,7 @@ int main(int, char**)
wchar_t str[100];
cpp17_output_iterator<wchar_t*> iter = f.put(cpp17_output_iterator<wchar_t*>(str), false, ios, '*', v);
std::wstring ex(str, base(iter));
-# ifdef _AIX
+# if defined(_AIX) || defined(__APPLE__)
assert(ex == L"-" + currency_symbol + L"1,234,567.89");
# else
assert(ex == currency_symbol + L"-1,234,567.89");
@@ -400,7 +397,7 @@ int main(int, char**)
wchar_t str[100];
cpp17_output_iterator<wchar_t*> iter = f.put(cpp17_output_iterator<wchar_t*>(str), false, ios, ' ', v);
std::wstring ex(str, base(iter));
-# ifdef _AIX
+# if defined(_AIX) || defined(__APPLE__)
assert(ex == L"-" + currency_symbol + L"1,234,567.89 ");
# else
assert(ex == currency_symbol + L"-1,234,567.89 ");
@@ -415,7 +412,7 @@ int main(int, char**)
wchar_t str[100];
cpp17_output_iterator<wchar_t*> iter = f.put(cpp17_output_iterator<wchar_t*>(str), false, ios, ' ', v);
std::wstring ex(str, base(iter));
-# ifdef _AIX
+# if defined(_AIX) || defined(__APPLE__)
assert(ex == L"-" + currency_symbol + L" 1,234,567.89");
# else
assert(ex == currency_symbol + L"- 1,234,567.89");
@@ -430,7 +427,7 @@ int main(int, char**)
wchar_t str[100];
cpp17_output_iterator<wchar_t*> iter = f.put(cpp17_output_iterator<wchar_t*>(str), false, ios, ' ', v);
std::wstring ex(str, base(iter));
-# ifdef _AIX
+# if defined(_AIX) || defined(__APPLE__)
assert(ex == L" -" + currency_symbol + L"1,234,567.89");
# else
assert(ex == L" " + currency_symbol + L"-1,234,567.89");
@@ -483,7 +480,7 @@ int main(int, char**)
wchar_t str[100];
cpp17_output_iterator<wchar_t*> iter = f.put(cpp17_output_iterator<wchar_t*>(str), true, ios, '*', v);
std::wstring ex(str, base(iter));
-# if defined(TEST_HAS_GLIBC) || defined(_AIX)
+# if defined(TEST_HAS_GLIBC) || defined(_AIX) || defined(__APPLE__)
assert(ex == L"-" + currency_name + L"0.01");
#else
assert(ex == currency_name + L"-0.01");
@@ -503,7 +500,7 @@ int main(int, char**)
wchar_t str[100];
cpp17_output_iterator<wchar_t*> iter = f.put(cpp17_output_iterator<wchar_t*>(str), true, ios, '*', v);
std::wstring ex(str, base(iter));
-# if defined(TEST_HAS_GLIBC) || defined(_AIX)
+# if defined(TEST_HAS_GLIBC) || defined(_AIX) || defined(__APPLE__)
assert(ex == L"-" + currency_name + L"1,234,567.89");
#else
assert(ex == currency_name + L"-1,234,567.89");
@@ -517,7 +514,7 @@ int main(int, char**)
wchar_t str[100];
cpp17_output_iterator<wchar_t*> iter = f.put(cpp17_output_iterator<wchar_t*>(str), true, ios, ' ', v);
std::wstring ex(str, base(iter));
-# if defined(TEST_HAS_GLIBC) || defined(_AIX)
+# if defined(TEST_HAS_GLIBC) || defined(_AIX) || defined(__APPLE__)
assert(ex == L"-" + currency_name + L"1,234,567.89" + currency_name_padding);
#else
assert(ex == currency_name + L"-1,234,567.89" + currency_name_padding);
@@ -532,7 +529,7 @@ int main(int, char**)
wchar_t str[100];
cpp17_output_iterator<wchar_t*> iter = f.put(cpp17_output_iterator<wchar_t*>(str), true, ios, ' ', v);
std::wstring ex(str, base(iter));
-# if defined(TEST_HAS_GLIBC) || defined(_AIX)
+# if defined(TEST_HAS_GLIBC) || defined(_AIX) || defined(__APPLE__)
assert(ex == L"-" + currency_name + currency_name_padding + L"1,234,567.89");
#else
assert(ex == currency_name + L"-" + currency_name_padding + L"1,234,567.89");
@@ -547,7 +544,7 @@ int main(int, char**)
wchar_t str[100];
cpp17_output_iterator<wchar_t*> iter = f.put(cpp17_output_iterator<wchar_t*>(str), true, ios, ' ', v);
std::wstring ex(str, base(iter));
-# if defined(TEST_HAS_GLIBC) || defined(_AIX)
+# if defined(TEST_HAS_GLIBC) || defined(_AIX) || defined(__APPLE__)
assert(ex == currency_name_padding + L"-" + currency_name + L"1,234,567.89");
#else
assert(ex == currency_name_padding + currency_name + L"-1,234,567.89");
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/curr_symbol.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/curr_symbol.pass.cpp
index 9c1253d47acd..e7f0f29e8774 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/curr_symbol.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/curr_symbol.pass.cpp
@@ -6,9 +6,6 @@
//
//===----------------------------------------------------------------------===//
-// TODO(mordante) Investigate
-// UNSUPPORTED: apple-clang
-
// NetBSD does not support LC_MONETARY at the moment
// XFAIL: netbsd
@@ -117,11 +114,7 @@ int main(int, char**)
{
Fnf f(LOCALE_fr_FR_UTF_8, 1);
-#ifdef __APPLE__
- assert(f.curr_symbol() == " Eu");
-#else
assert(f.curr_symbol() == " \u20ac");
-#endif
}
{
Fnt f(LOCALE_fr_FR_UTF_8, 1);
@@ -130,11 +123,7 @@ int main(int, char**)
#ifndef TEST_HAS_NO_WIDE_CHARACTERS
{
Fwf f(LOCALE_fr_FR_UTF_8, 1);
-#ifdef __APPLE__
- assert(f.curr_symbol() == L" Eu");
-#else
assert(f.curr_symbol() == L" \u20ac");
-#endif
}
{
Fwt f(LOCALE_fr_FR_UTF_8, 1);
@@ -164,7 +153,7 @@ int main(int, char**)
{
Fnf f(LOCALE_zh_CN_UTF_8, 1);
-#ifdef _WIN32
+#if defined(_WIN32) || defined(__APPLE__)
assert(f.curr_symbol() == "\xC2\xA5"); // \u00A5
#else
assert(f.curr_symbol() == "\xEF\xBF\xA5"); // \uFFE5
@@ -177,7 +166,7 @@ int main(int, char**)
#ifndef TEST_HAS_NO_WIDE_CHARACTERS
{
Fwf f(LOCALE_zh_CN_UTF_8, 1);
-#ifdef _WIN32
+#if defined(_WIN32) || defined(__APPLE__)
assert(f.curr_symbol() == L"\u00A5");
#else
assert(f.curr_symbol() == L"\uFFE5");
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/grouping.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/grouping.pass.cpp
index 630b2739c88a..90dc6c4d7a2a 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/grouping.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/grouping.pass.cpp
@@ -6,11 +6,6 @@
//
//===----------------------------------------------------------------------===//
-// TODO(mordante) Investigate
-// UNSUPPORTED: apple-clang
-
-// XFAIL: darwin
-//
// NetBSD does not support LC_MONETARY at the moment
// XFAIL: netbsd
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/neg_format.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/neg_format.pass.cpp
index a3e3d853524b..e9528147dfe6 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/neg_format.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/neg_format.pass.cpp
@@ -6,9 +6,6 @@
//
//===----------------------------------------------------------------------===//
-// TODO(mordante) Investigate
-// UNSUPPORTED: apple-clang
-
// NetBSD does not support LC_MONETARY at the moment
// XFAIL: netbsd
@@ -82,14 +79,6 @@ void assert_sign_symbol_none_value(std::money_base::pattern p)
assert(p.field[3] == std::money_base::value);
}
-void assert_value_none_symbol_sign(std::money_base::pattern p)
-{
- assert(p.field[0] == std::money_base::value);
- assert(p.field[1] == std::money_base::none);
- assert(p.field[2] == std::money_base::symbol);
- assert(p.field[3] == std::money_base::sign);
-}
-
void assert_sign_value_none_symbol(std::money_base::pattern p)
{
assert(p.field[0] == std::money_base::sign);
@@ -149,39 +138,23 @@ int main(int, char**)
{
Fnf f(LOCALE_fr_FR_UTF_8, 1);
std::money_base::pattern p = f.neg_format();
-#ifdef __APPLE__
- assert_value_none_symbol_sign(p);
-#else
assert_sign_value_none_symbol(p);
-#endif
}
{
Fnt f(LOCALE_fr_FR_UTF_8, 1);
std::money_base::pattern p = f.neg_format();
-#ifdef __APPLE__
- assert_value_none_symbol_sign(p);
-#else
assert_sign_value_none_symbol(p);
-#endif
}
#ifndef TEST_HAS_NO_WIDE_CHARACTERS
{
Fwf f(LOCALE_fr_FR_UTF_8, 1);
std::money_base::pattern p = f.neg_format();
-#ifdef __APPLE__
- assert_value_none_symbol_sign(p);
-#else
assert_sign_value_none_symbol(p);
-#endif
}
{
Fwt f(LOCALE_fr_FR_UTF_8, 1);
std::money_base::pattern p = f.neg_format();
-#ifdef __APPLE__
- assert_value_none_symbol_sign(p);
-#else
assert_sign_value_none_symbol(p);
-#endif
}
#endif // TEST_HAS_NO_WIDE_CHARACTERS
@@ -211,7 +184,7 @@ int main(int, char**)
{
Fnf f(LOCALE_zh_CN_UTF_8, 1);
std::money_base::pattern p = f.neg_format();
-#ifdef _AIX
+#if defined(_AIX) || defined(__APPLE__)
assert_sign_symbol_none_value(p);
#else
assert_symbol_sign_none_value(p);
@@ -220,7 +193,7 @@ int main(int, char**)
{
Fnt f(LOCALE_zh_CN_UTF_8, 1);
std::money_base::pattern p = f.neg_format();
-#if defined(_WIN32) || defined(__APPLE__)
+#if defined(_WIN32)
assert_symbol_sign_none_value(p);
#else
assert_sign_symbol_none_value(p);
@@ -230,7 +203,7 @@ int main(int, char**)
{
Fwf f(LOCALE_zh_CN_UTF_8, 1);
std::money_base::pattern p = f.neg_format();
-#ifdef _AIX
+#if defined(_AIX) || defined(__APPLE__)
assert_sign_symbol_none_value(p);
#else
assert_symbol_sign_none_value(p);
@@ -239,7 +212,7 @@ int main(int, char**)
{
Fwt f(LOCALE_zh_CN_UTF_8, 1);
std::money_base::pattern p = f.neg_format();
-#if defined(_WIN32) || defined(__APPLE__)
+#if defined(_WIN32)
assert_symbol_sign_none_value(p);
#else
assert_sign_symbol_none_value(p);
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/pos_format.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/pos_format.pass.cpp
index 671620a0c2f9..11832a7d8927 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/pos_format.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/pos_format.pass.cpp
@@ -5,7 +5,7 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
+
// NetBSD does not support LC_MONETARY at the moment
// XFAIL: netbsd
@@ -79,14 +79,6 @@ void assert_sign_symbol_none_value(std::money_base::pattern p)
assert(p.field[3] == std::money_base::value);
}
-void assert_value_none_symbol_sign(std::money_base::pattern p)
-{
- assert(p.field[0] == std::money_base::value);
- assert(p.field[1] == std::money_base::none);
- assert(p.field[2] == std::money_base::symbol);
- assert(p.field[3] == std::money_base::sign);
-}
-
void assert_sign_value_none_symbol(std::money_base::pattern p)
{
assert(p.field[0] == std::money_base::sign);
diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_double.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_double.pass.cpp
index 612d3738a373..31682fea43bc 100644
--- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_double.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_double.pass.cpp
@@ -6,12 +6,10 @@
//
//===----------------------------------------------------------------------===//
-// TODO(mordante) Investigate
-// UNSUPPORTED: apple-clang
-
// The fix for LWG2381 (https://github.com/llvm/llvm-project/pull/77948) changed behavior of
// FP parsing. This requires 3e15c97fa3812993bdc319827a5c6d867b765ae8 in the dylib.
-// XFAIL: using-built-library-before-llvm-19
+// TODO: Remove && !darwin once availability markup for LLVM 19 on macOS has been added
+// XFAIL: using-built-library-before-llvm-19 && !darwin
// <locale>
diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_float.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_float.pass.cpp
index 58bc9e5abef8..57eedc8633be 100644
--- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_float.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_float.pass.cpp
@@ -6,12 +6,10 @@
//
//===----------------------------------------------------------------------===//
-// TODO(mordante) Investigate
-// UNSUPPORTED: apple-clang
-
// The fix for LWG2381 (https://github.com/llvm/llvm-project/pull/77948) changed behavior of
// FP parsing. This requires 3e15c97fa3812993bdc319827a5c6d867b765ae8 in the dylib.
-// XFAIL: using-built-library-before-llvm-19
+// TODO: Remove && !darwin once availability markup for LLVM 19 on macOS has been added
+// XFAIL: using-built-library-before-llvm-19 && !darwin
// <locale>
diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long_double.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long_double.pass.cpp
index bf8bb651d6bc..8324ee317014 100644
--- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long_double.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long_double.pass.cpp
@@ -6,12 +6,10 @@
//
//===----------------------------------------------------------------------===//
-// TODO(mordante) Investigate
-// UNSUPPORTED: apple-clang
-
// The fix for LWG2381 (https://github.com/llvm/llvm-project/pull/77948) changed behavior of
// FP parsing. This requires 3e15c97fa3812993bdc319827a5c6d867b765ae8 in the dylib.
-// XFAIL: using-built-library-before-llvm-19
+// TODO: Remove && !darwin once availability markup for LLVM 19 on macOS has been added
+// XFAIL: using-built-library-before-llvm-19 && !darwin
// <locale>
diff --git a/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/grouping.pass.cpp b/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/grouping.pass.cpp
index a87c5e0ace28..11ec75469c70 100644
--- a/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/grouping.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/grouping.pass.cpp
@@ -5,10 +5,7 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-
-// TODO(mordante) Investigate
-// UNSUPPORTED: apple-clang
-
+//
// NetBSD does not support LC_NUMERIC at the moment
// XFAIL: netbsd
@@ -63,7 +60,7 @@ int main(int, char**)
}
{
std::locale l(LOCALE_fr_FR_UTF_8);
-#if defined(TEST_HAS_GLIBC) || defined(_WIN32) || defined(_AIX)
+#if defined(TEST_HAS_GLIBC) || defined(_WIN32) || defined(_AIX) || defined(__APPLE__)
const char* const group = "\3";
#else
const char* const group = "\x7f";
diff --git a/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/thousands_sep.pass.cpp b/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/thousands_sep.pass.cpp
index ef39e8aa7b68..53f2c8554f3d 100644
--- a/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/thousands_sep.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/thousands_sep.pass.cpp
@@ -6,9 +6,6 @@
//
//===----------------------------------------------------------------------===//
-// TODO(mordante) Investigate
-// UNSUPPORTED: apple-clang
-
// NetBSD does not support LC_NUMERIC at the moment
// XFAIL: netbsd
@@ -69,7 +66,7 @@ int main(int, char**)
// The below tests work around GLIBC's use of U202F as LC_NUMERIC thousands_sep.
std::locale l(LOCALE_fr_FR_UTF_8);
{
-#if defined(_CS_GNU_LIBC_VERSION) || defined(_WIN32) || defined(_AIX)
+#if defined(_CS_GNU_LIBC_VERSION) || defined(_WIN32) || defined(_AIX) || defined(__APPLE__)
const char sep = ' ';
#else
const char sep = ',';
diff --git a/libcxx/test/std/time/time.duration/time.duration.nonmember/ostream.pass.cpp b/libcxx/test/std/time/time.duration/time.duration.nonmember/ostream.pass.cpp
index 4e84db9a84d7..97ac04275b0b 100644
--- a/libcxx/test/std/time/time.duration/time.duration.nonmember/ostream.pass.cpp
+++ b/libcxx/test/std/time/time.duration/time.duration.nonmember/ostream.pass.cpp
@@ -6,9 +6,6 @@
//
//===----------------------------------------------------------------------===//
-// TODO(mordante) Investigate
-// UNSUPPORTED: apple-clang
-
// UNSUPPORTED: c++03, c++11, c++14, c++17
// UNSUPPORTED: no-localization
// UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME
@@ -83,17 +80,10 @@ static void test_values() {
assert(stream_c_locale<CharT>(1'000.123456s) == SV("1000.1235s"));
if constexpr (std::same_as<CharT, char>) {
-#if defined(__APPLE__)
- assert(stream_fr_FR_locale<CharT>(-1'000'000s) == SV("-1000000s"));
- assert(stream_fr_FR_locale<CharT>(1'000'000s) == SV("1000000s"));
- assert(stream_fr_FR_locale<CharT>(-1'000.123456s) == SV("-1000,1235s"));
- assert(stream_fr_FR_locale<CharT>(1'000.123456s) == SV("1000,1235s"));
-#else
assert(stream_fr_FR_locale<CharT>(-1'000'000s) == SV("-1 000 000s"));
assert(stream_fr_FR_locale<CharT>(1'000'000s) == SV("1 000 000s"));
assert(stream_fr_FR_locale<CharT>(-1'000.123456s) == SV("-1 000,1235s"));
assert(stream_fr_FR_locale<CharT>(1'000.123456s) == SV("1 000,1235s"));
-#endif
} else {
#ifndef TEST_HAS_NO_WIDE_CHARACTERS
assert(stream_fr_FR_locale<CharT>(-1'000'000s) == L"-1" FR_THOU_SEP "000" FR_THOU_SEP "000s");
diff --git a/libcxx/test/std/time/time.syn/formatter.duration.pass.cpp b/libcxx/test/std/time/time.syn/formatter.duration.pass.cpp
index 973bce8f81d4..f1f7debed246 100644
--- a/libcxx/test/std/time/time.syn/formatter.duration.pass.cpp
+++ b/libcxx/test/std/time/time.syn/formatter.duration.pass.cpp
@@ -6,9 +6,6 @@
//
//===----------------------------------------------------------------------===//
-// TODO(mordante) Investigate
-// UNSUPPORTED: apple-clang
-
// UNSUPPORTED: c++03, c++11, c++14, c++17
// UNSUPPORTED: no-localization
// UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME
@@ -408,19 +405,11 @@ static void test_valid_positive_integral_values() {
"%OM='00'\t"
"%S='00'\t"
"%OS='00'\t"
-# if defined(__APPLE__)
- "%p='AM'\t"
-# else
"%p='午前'\t"
-# endif
"%R='00:00'\t"
"%T='00:00:00'\t"
# if defined(__APPLE__) || defined(__FreeBSD__)
-# if defined(__APPLE__)
- "%r='12:00:00 AM'\t"
-# else
"%r='12:00:00 午前'\t"
-# endif
"%X='00時00分00秒'\t"
"%EX='00時00分00秒'\t"
# elif defined(_WIN32)
@@ -448,19 +437,11 @@ static void test_valid_positive_integral_values() {
"%OM='59'\t"
"%S='59'\t"
"%OS='59'\t"
-# if defined(__APPLE__)
- "%p='AM'\t"
-# else
"%p='午前'\t"
-# endif
"%R='11:59'\t"
"%T='11:59:59'\t"
# if defined(__APPLE__) || defined(__FreeBSD__)
-# if defined(__APPLE__)
- "%r='11:59:59 AM'\t"
-# else
"%r='11:59:59 午前'\t"
-# endif
"%X='11時59分59秒'\t"
"%EX='11時59分59秒'\t"
# elif defined(_WIN32)
@@ -488,19 +469,11 @@ static void test_valid_positive_integral_values() {
"%OM='00'\t"
"%S='00'\t"
"%OS='00'\t"
-# if defined(__APPLE__)
- "%p='PM'\t"
-# else
"%p='午後'\t"
-# endif
"%R='12:00'\t"
"%T='12:00:00'\t"
# if defined(__APPLE__) || defined(__FreeBSD__)
-# if defined(__APPLE__)
- "%r='12:00:00 PM'\t"
-# else
"%r='12:00:00 午後'\t"
-# endif
"%X='12時00分00秒'\t"
"%EX='12時00分00秒'\t"
# else
@@ -528,19 +501,11 @@ static void test_valid_positive_integral_values() {
"%OM='59'\t"
"%S='59'\t"
"%OS='59'\t"
-# if defined(__APPLE__)
- "%p='PM'\t"
-# else
"%p='午後'\t"
-# endif
"%R='23:59'\t"
"%T='23:59:59'\t"
# if defined(__APPLE__) || defined(__FreeBSD__)
-# if defined(__APPLE__)
- "%r='11:59:59 PM'\t"
-# else
"%r='11:59:59 午後'\t"
-# endif
"%X='23時59分59秒'\t"
"%EX='23時59分59秒'\t"
# else
@@ -568,19 +533,11 @@ static void test_valid_positive_integral_values() {
"%OM='00'\t"
"%S='00'\t"
"%OS='00'\t"
-# if defined(__APPLE__)
- "%p='AM'\t"
-# else
"%p='午前'\t"
-# endif
"%R='00:00'\t"
"%T='00:00:00'\t"
# if defined(__APPLE__) || defined(__FreeBSD__)
-# if defined(__APPLE__)
- "%r='12:00:00 AM'\t"
-# else
"%r='12:00:00 午前'\t"
-# endif
"%X='00時00分00秒'\t"
"%EX='00時00分00秒'\t"
# elif defined(_WIN32)
@@ -835,19 +792,11 @@ static void test_valid_negative_integral_values() {
"%OM='59'\t"
"%S='59'\t"
"%OS='59'\t"
-# if defined(__APPLE__)
- "%p='PM'\t"
-# else
"%p='午後'\t"
-# endif
"%R='23:59'\t"
"%T='23:59:59'\t"
# if defined(__APPLE__) || defined(__FreeBSD__)
-# if defined(__APPLE__)
- "%r='11:59:59 PM'\t"
-# else
"%r='11:59:59 午後'\t"
-# endif
"%X='23時59分59秒'\t"
"%EX='23時59分59秒'\t"
# elif defined(_WIN32)
diff --git a/libcxx/test/std/time/time.syn/formatter.file_time.pass.cpp b/libcxx/test/std/time/time.syn/formatter.file_time.pass.cpp
index 28a972b19dce..e258c4161eda 100644
--- a/libcxx/test/std/time/time.syn/formatter.file_time.pass.cpp
+++ b/libcxx/test/std/time/time.syn/formatter.file_time.pass.cpp
@@ -6,9 +6,6 @@
//
//===----------------------------------------------------------------------===//
-// TODO(mordante) Investigate
-// UNSUPPORTED: apple-clang
-
// UNSUPPORTED: c++03, c++11, c++14, c++17
// UNSUPPORTED: no-localization
// UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME
@@ -695,19 +692,11 @@ static void test_valid_values_time() {
"%OM='00'\t"
"%S='00'\t"
"%OS='00'\t"
-# if defined(__APPLE__)
- "%p='AM'\t"
-# else
"%p='午前'\t"
-# endif
"%R='00:00'\t"
"%T='00:00:00'\t"
# if defined(__APPLE__) || defined(__FreeBSD__)
-# if defined(__APPLE__)
- "%r='12:00:00 AM'\t"
-# else
"%r='12:00:00 午前'\t"
-# endif
"%X='00時00分00秒'\t"
"%EX='00時00分00秒'\t"
# elif defined(_WIN32)
@@ -732,19 +721,11 @@ static void test_valid_values_time() {
"%OM='31'\t"
"%S='30.123'\t"
"%OS='30.123'\t"
-# if defined(__APPLE__)
- "%p='PM'\t"
-# else
"%p='午後'\t"
-# endif
"%R='23:31'\t"
"%T='23:31:30.123'\t"
# if defined(__APPLE__) || defined(__FreeBSD__)
-# if defined(__APPLE__)
- "%r='11:31:30 PM'\t"
-# else
"%r='11:31:30 午後'\t"
-# endif
"%X='23時31分30秒'\t"
"%EX='23時31分30秒'\t"
# elif defined(_WIN32)
diff --git a/libcxx/test/std/time/time.syn/formatter.hh_mm_ss.pass.cpp b/libcxx/test/std/time/time.syn/formatter.hh_mm_ss.pass.cpp
index 82d9b4c7540a..bbd9c074bef2 100644
--- a/libcxx/test/std/time/time.syn/formatter.hh_mm_ss.pass.cpp
+++ b/libcxx/test/std/time/time.syn/formatter.hh_mm_ss.pass.cpp
@@ -6,9 +6,6 @@
//
//===----------------------------------------------------------------------===//
-// TODO(mordante) Investigate
-// UNSUPPORTED: apple-clang
-
// UNSUPPORTED: c++03, c++11, c++14, c++17
// UNSUPPORTED: no-localization
// UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME
@@ -302,19 +299,11 @@ static void test_valid_values() {
"%OM='00'\t"
"%S='00'\t"
"%OS='00'\t"
-# if defined(__APPLE__)
- "%p='AM'\t"
-# else
"%p='午前'\t"
-# endif
"%R='00:00'\t"
"%T='00:00:00'\t"
# if defined(__APPLE__) || defined(__FreeBSD__)
-# if defined(__APPLE__)
- "%r='12:00:00 AM'\t"
-# else
"%r='12:00:00 午前'\t"
-# endif
"%X='00時00分00秒'\t"
"%EX='00時00分00秒'\t"
# elif defined(_WIN32)
@@ -339,19 +328,11 @@ static void test_valid_values() {
"%OM='31'\t"
"%S='30.123'\t"
"%OS='30.123'\t"
-# if defined(__APPLE__)
- "%p='PM'\t"
-# else
"%p='午後'\t"
-# endif
"%R='23:31'\t"
"%T='23:31:30.123'\t"
# if defined(__APPLE__) || defined(__FreeBSD__)
-# if defined(__APPLE__)
- "%r='11:31:30 PM'\t"
-# else
"%r='11:31:30 午後'\t"
-# endif
"%X='23時31分30秒'\t"
"%EX='23時31分30秒'\t"
# elif defined(_WIN32)
@@ -376,19 +357,11 @@ static void test_valid_values() {
"%OM='02'\t"
"%S='01.123456789012'\t"
"%OS='01.123456789012'\t"
-# if defined(__APPLE__)
- "%p='AM'\t"
-# else
"%p='午前'\t"
-# endif
"%R='03:02'\t"
"%T='03:02:01.123456789012'\t"
# if defined(__APPLE__) || defined(__FreeBSD__)
-# if defined(__APPLE__)
- "%r='03:02:01 AM'\t"
-# else
"%r='03:02:01 午前'\t"
-# endif
"%X='03時02分01秒'\t"
"%EX='03時02分01秒'\t"
# elif defined(_WIN32)
@@ -413,19 +386,11 @@ static void test_valid_values() {
"%OM='01'\t"
"%S='01'\t"
"%OS='01'\t"
-# if defined(__APPLE__)
- "%p='AM'\t"
-# else
"%p='午前'\t"
-# endif
"%R='01:01'\t"
"%T='01:01:01'\t"
# if defined(__APPLE__) || defined(__FreeBSD__)
-# if defined(__APPLE__)
- "%r='01:01:01 AM'\t"
-# else
"%r='01:01:01 午前'\t"
-# endif
"%X='01時01分01秒'\t"
"%EX='01時01分01秒'\t"
# elif defined(_WIN32)
diff --git a/libcxx/test/std/time/time.syn/formatter.local_time.pass.cpp b/libcxx/test/std/time/time.syn/formatter.local_time.pass.cpp
index bd23337ccb31..ce3af8ec199a 100644
--- a/libcxx/test/std/time/time.syn/formatter.local_time.pass.cpp
+++ b/libcxx/test/std/time/time.syn/formatter.local_time.pass.cpp
@@ -6,9 +6,6 @@
//
//===----------------------------------------------------------------------===//
-// TODO(mordante) Investigate
-// UNSUPPORTED: apple-clang
-
// UNSUPPORTED: c++03, c++11, c++14, c++17
// UNSUPPORTED: no-localization
// UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME
@@ -694,19 +691,11 @@ static void test_valid_values_time() {
"%OM='00'\t"
"%S='00'\t"
"%OS='00'\t"
-# if defined(__APPLE__)
- "%p='AM'\t"
-# else
"%p='午前'\t"
-# endif
"%R='00:00'\t"
"%T='00:00:00'\t"
# if defined(__APPLE__) || defined(__FreeBSD__)
-# if defined(__APPLE__)
- "%r='12:00:00 AM'\t"
-# else
"%r='12:00:00 午前'\t"
-# endif
"%X='00時00分00秒'\t"
"%EX='00時00分00秒'\t"
# elif defined(_WIN32)
@@ -731,19 +720,11 @@ static void test_valid_values_time() {
"%OM='31'\t"
"%S='30.123'\t"
"%OS='30.123'\t"
-# if defined(__APPLE__)
- "%p='PM'\t"
-# else
"%p='午後'\t"
-# endif
"%R='23:31'\t"
"%T='23:31:30.123'\t"
# if defined(__APPLE__) || defined(__FreeBSD__)
-# if defined(__APPLE__)
- "%r='11:31:30 PM'\t"
-# else
"%r='11:31:30 午後'\t"
-# endif
"%X='23時31分30秒'\t"
"%EX='23時31分30秒'\t"
# elif defined(_WIN32)
diff --git a/libcxx/test/std/time/time.syn/formatter.sys_time.pass.cpp b/libcxx/test/std/time/time.syn/formatter.sys_time.pass.cpp
index 9c9c8e0de1e9..9238f3daf1f8 100644
--- a/libcxx/test/std/time/time.syn/formatter.sys_time.pass.cpp
+++ b/libcxx/test/std/time/time.syn/formatter.sys_time.pass.cpp
@@ -6,9 +6,6 @@
//
//===----------------------------------------------------------------------===//
-// TODO(mordante) Investigate
-// UNSUPPORTED: apple-clang
-
// UNSUPPORTED: c++03, c++11, c++14, c++17
// UNSUPPORTED: no-localization
// UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME
@@ -691,19 +688,11 @@ static void test_valid_values_time() {
"%OM='00'\t"
"%S='00'\t"
"%OS='00'\t"
-# if defined(__APPLE__)
- "%p='AM'\t"
-# else
"%p='午前'\t"
-# endif
"%R='00:00'\t"
"%T='00:00:00'\t"
# if defined(__APPLE__) || defined(__FreeBSD__)
-# if defined(__APPLE__)
- "%r='12:00:00 AM'\t"
-# else
"%r='12:00:00 午前'\t"
-# endif
"%X='00時00分00秒'\t"
"%EX='00時00分00秒'\t"
# elif defined(_WIN32)
@@ -728,19 +717,11 @@ static void test_valid_values_time() {
"%OM='31'\t"
"%S='30.123'\t"
"%OS='30.123'\t"
-# if defined(__APPLE__)
- "%p='PM'\t"
-# else
"%p='午後'\t"
-# endif
"%R='23:31'\t"
"%T='23:31:30.123'\t"
# if defined(__APPLE__) || defined(__FreeBSD__)
-# if defined(__APPLE__)
- "%r='11:31:30 PM'\t"
-# else
"%r='11:31:30 午後'\t"
-# endif
"%X='23時31分30秒'\t"
"%EX='23時31分30秒'\t"
# elif defined(_WIN32)
diff --git a/libcxx/test/support/locale_helpers.h b/libcxx/test/support/locale_helpers.h
index 946c2fed0f3a..3cec7397e3d7 100644
--- a/libcxx/test/support/locale_helpers.h
+++ b/libcxx/test/support/locale_helpers.h
@@ -73,6 +73,12 @@ MultiStringType currency_symbol_ru_RU() {
return MKSTR("\u20BD"); // U+20BD RUBLE SIGN
#elif defined(_WIN32) || defined(__FreeBSD__) || defined(_AIX)
return MKSTR("\u20BD"); // U+20BD RUBLE SIGN
+#elif defined(__APPLE__)
+ if (__builtin_available(macOS 15.4, *)) {
+ return MKSTR("\u20BD"); // U+20BD RUBLE SIGN
+ } else {
+ return MKSTR("\u0440\u0443\u0431.");
+ }
#else
return MKSTR("\u0440\u0443\u0431.");
#endif
@@ -81,6 +87,12 @@ MultiStringType currency_symbol_ru_RU() {
MultiStringType currency_symbol_zh_CN() {
#if defined(_WIN32)
return MKSTR("\u00A5"); // U+00A5 YEN SIGN
+#elif defined(__APPLE__)
+ if (__builtin_available(macOS 15.4, *)) {
+ return MKSTR("\u00A5"); // U+00A5 YEN SIGN
+ } else {
+ return MKSTR("\uFFE5"); // U+FFE5 FULLWIDTH YEN SIGN
+ }
#else
return MKSTR("\uFFE5"); // U+FFE5 FULLWIDTH YEN SIGN
#endif
diff --git a/libcxxabi/test/uncaught_exception.pass.cpp b/libcxxabi/test/uncaught_exception.pass.cpp
index 8e8468c43240..e97732006e11 100644
--- a/libcxxabi/test/uncaught_exception.pass.cpp
+++ b/libcxxabi/test/uncaught_exception.pass.cpp
@@ -6,9 +6,6 @@
//
//===----------------------------------------------------------------------===//
-// TODO(mordante) Investigate
-// UNSUPPORTED: apple-clang
-
// UNSUPPORTED: no-exceptions
// This tests that libc++abi still provides __cxa_uncaught_exception() for
@@ -18,7 +15,8 @@
// to undefined symbols when linking against a libc++ that re-exports the symbols,
// but running against a libc++ that doesn't. Fortunately, usage of __cxa_uncaught_exception()
// in the wild seems to be close to non-existent.
-// XFAIL: using-built-library-before-llvm-19
+// TODO: Remove && !darwin once availability markup for LLVM 19 on macOS has been added
+// XFAIL: using-built-library-before-llvm-19 && !darwin
#include <cxxabi.h>
#include <cassert>
diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp
index bbf4b29a9fda..a4150ebfa165 100644
--- a/lld/ELF/SyntheticSections.cpp
+++ b/lld/ELF/SyntheticSections.cpp
@@ -2749,14 +2749,13 @@ RelroPaddingSection::RelroPaddingSection(Ctx &ctx)
: SyntheticSection(ctx, ".relro_padding", SHT_NOBITS, SHF_ALLOC | SHF_WRITE,
1) {}
-RandomizePaddingSection::RandomizePaddingSection(Ctx &ctx, uint64_t size,
- OutputSection *parent)
- : SyntheticSection(ctx, ".randomize_padding", SHT_PROGBITS, SHF_ALLOC, 1),
+PaddingSection::PaddingSection(Ctx &ctx, uint64_t size, OutputSection *parent)
+ : SyntheticSection(ctx, ".padding", SHT_PROGBITS, SHF_ALLOC, 1),
size(size) {
this->parent = parent;
}
-void RandomizePaddingSection::writeTo(uint8_t *buf) {
+void PaddingSection::writeTo(uint8_t *buf) {
std::array<uint8_t, 4> filler = getParent()->getFiller(ctx);
uint8_t *end = buf + size;
for (; buf + 4 <= end; buf += 4)
diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h
index ac3ec63f0a7a..38e68110e4bc 100644
--- a/lld/ELF/SyntheticSections.h
+++ b/lld/ELF/SyntheticSections.h
@@ -779,11 +779,11 @@ public:
void writeTo(uint8_t *buf) override {}
};
-class RandomizePaddingSection final : public SyntheticSection {
+class PaddingSection final : public SyntheticSection {
uint64_t size;
public:
- RandomizePaddingSection(Ctx &ctx, uint64_t size, OutputSection *parent);
+ PaddingSection(Ctx &ctx, uint64_t size, OutputSection *parent);
size_t getSize() const override { return size; }
void writeTo(uint8_t *buf) override;
};
diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index 4fa80397cbfa..083b4fb1dbd2 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -1495,15 +1495,14 @@ static void randomizeSectionPadding(Ctx &ctx) {
if (auto *isd = dyn_cast<InputSectionDescription>(bc)) {
SmallVector<InputSection *, 0> tmp;
if (os->ptLoad != curPtLoad) {
- tmp.push_back(make<RandomizePaddingSection>(
- ctx, g() % ctx.arg.maxPageSize, os));
+ tmp.push_back(
+ make<PaddingSection>(ctx, g() % ctx.arg.maxPageSize, os));
curPtLoad = os->ptLoad;
}
for (InputSection *isec : isd->sections) {
// Probability of inserting padding is 1 in 16.
if (g() % 16 == 0)
- tmp.push_back(
- make<RandomizePaddingSection>(ctx, isec->addralign, os));
+ tmp.push_back(make<PaddingSection>(ctx, isec->addralign, os));
tmp.push_back(isec);
}
isd->sections = std::move(tmp);
diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py
index d892c01f0bc7..8f3652172dfd 100644
--- a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py
+++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py
@@ -10,8 +10,8 @@ import string
import subprocess
import signal
import sys
-import threading
import warnings
+import selectors
import time
from typing import (
Any,
@@ -139,35 +139,6 @@ def dump_memory(base_addr, data, num_per_line, outfile):
outfile.write("\n")
-def read_packet(
- f: IO[bytes], trace_file: Optional[IO[str]] = None
-) -> Optional[ProtocolMessage]:
- """Decode a JSON packet that starts with the content length and is
- followed by the JSON bytes from a file 'f'. Returns None on EOF.
- """
- line = f.readline().decode("utf-8")
- if len(line) == 0:
- return None # EOF.
-
- # Watch for line that starts with the prefix
- prefix = "Content-Length: "
- if line.startswith(prefix):
- # Decode length of JSON bytes
- length = int(line[len(prefix) :])
- # Skip empty line
- separator = f.readline().decode()
- if separator != "":
- Exception("malformed DAP content header, unexpected line: " + separator)
- # Read JSON bytes
- json_str = f.read(length).decode()
- if trace_file:
- trace_file.write("from adapter:\n%s\n" % (json_str))
- # Decode the JSON bytes into a python dictionary
- return json.loads(json_str)
-
- raise Exception("unexpected malformed message from lldb-dap: " + line)
-
-
def packet_type_is(packet, packet_type):
return "type" in packet and packet["type"] == packet_type
@@ -199,16 +170,8 @@ class DebugCommunication(object):
self.log_file = log_file
self.send = send
self.recv = recv
-
- # Packets that have been received and processed but have not yet been
- # requested by a test case.
- self._pending_packets: List[Optional[ProtocolMessage]] = []
- # Received packets that have not yet been processed.
- self._recv_packets: List[Optional[ProtocolMessage]] = []
- # Used as a mutex for _recv_packets and for notify when _recv_packets
- # changes.
- self._recv_condition = threading.Condition()
- self._recv_thread = threading.Thread(target=self._read_packet_thread)
+ self.selector = selectors.DefaultSelector()
+ self.selector.register(recv, selectors.EVENT_READ)
# session state
self.init_commands = init_commands
@@ -234,9 +197,6 @@ class DebugCommunication(object):
# keyed by breakpoint id
self.resolved_breakpoints: dict[str, Breakpoint] = {}
- # trigger enqueue thread
- self._recv_thread.start()
-
@classmethod
def encode_content(cls, s: str) -> bytes:
return ("Content-Length: %u\r\n\r\n%s" % (len(s), s)).encode("utf-8")
@@ -252,17 +212,46 @@ class DebugCommunication(object):
f"seq mismatch in response {command['seq']} != {response['request_seq']}"
)
- def _read_packet_thread(self):
- try:
- while True:
- packet = read_packet(self.recv, trace_file=self.trace_file)
- # `packet` will be `None` on EOF. We want to pass it down to
- # handle_recv_packet anyway so the main thread can handle unexpected
- # termination of lldb-dap and stop waiting for new packets.
- if not self._handle_recv_packet(packet):
- break
- finally:
- dump_dap_log(self.log_file)
+ def _read_packet(
+ self,
+ timeout: float = DEFAULT_TIMEOUT,
+ ) -> Optional[ProtocolMessage]:
+ """Decode a JSON packet that starts with the content length and is
+ followed by the JSON bytes from self.recv. Returns None on EOF.
+ """
+
+ ready = self.selector.select(timeout)
+ if not ready:
+ warnings.warn(
+ "timeout occurred waiting for a packet, check if the test has a"
+ " negative assertion and see if it can be inverted.",
+ stacklevel=4,
+ )
+ return None # timeout
+
+ line = self.recv.readline().decode("utf-8")
+ if len(line) == 0:
+ return None # EOF.
+
+ # Watch for line that starts with the prefix
+ prefix = "Content-Length: "
+ if line.startswith(prefix):
+ # Decode length of JSON bytes
+ length = int(line[len(prefix) :])
+ # Skip empty line
+ separator = self.recv.readline().decode()
+ if separator != "":
+ Exception("malformed DAP content header, unexpected line: " + separator)
+ # Read JSON bytes
+ json_str = self.recv.read(length).decode()
+ if self.trace_file:
+ self.trace_file.write(
+ "%s from adapter:\n%s\n" % (time.time(), json_str)
+ )
+ # Decode the JSON bytes into a python dictionary
+ return json.loads(json_str)
+
+ raise Exception("unexpected malformed message from lldb-dap: " + line)
def get_modules(
self, start_module: Optional[int] = None, module_count: Optional[int] = None
@@ -310,34 +299,6 @@ class DebugCommunication(object):
output += self.get_output(category, clear=clear)
return output
- def _enqueue_recv_packet(self, packet: Optional[ProtocolMessage]):
- with self.recv_condition:
- self.recv_packets.append(packet)
- self.recv_condition.notify()
-
- def _handle_recv_packet(self, packet: Optional[ProtocolMessage]) -> bool:
- """Handles an incoming packet.
-
- Called by the read thread that is waiting for all incoming packets
- to store the incoming packet in "self._recv_packets" in a thread safe
- way. This function will then signal the "self._recv_condition" to
- indicate a new packet is available.
-
- Args:
- packet: A new packet to store.
-
- Returns:
- True if the caller should keep calling this function for more
- packets.
- """
- with self._recv_condition:
- self._recv_packets.append(packet)
- self._recv_condition.notify()
- # packet is None on EOF
- return packet is not None and not (
- packet["type"] == "response" and packet["command"] == "disconnect"
- )
-
def _recv_packet(
self,
*,
@@ -361,46 +322,34 @@ class DebugCommunication(object):
The first matching packet for the given predicate, if specified,
otherwise None.
"""
- assert (
- threading.current_thread != self._recv_thread
- ), "Must not be called from the _recv_thread"
-
- def process_until_match():
- self._process_recv_packets()
- for i, packet in enumerate(self._pending_packets):
- if packet is None:
- # We need to return a truthy value to break out of the
- # wait_for, use `EOFError` as an indicator of EOF.
- return EOFError()
- if predicate and predicate(packet):
- self._pending_packets.pop(i)
- return packet
-
- with self._recv_condition:
- packet = self._recv_condition.wait_for(process_until_match, timeout)
- return None if isinstance(packet, EOFError) else packet
-
- def _process_recv_packets(self) -> None:
+ deadline = time.time() + timeout
+
+ while time.time() < deadline:
+ packet = self._read_packet(timeout=deadline - time.time())
+ if packet is None:
+ return None
+ self._process_recv_packet(packet)
+ if not predicate or predicate(packet):
+ return packet
+
+ def _process_recv_packet(self, packet) -> None:
"""Process received packets, updating the session state."""
- with self._recv_condition:
- for packet in self._recv_packets:
- if packet and ("seq" not in packet or packet["seq"] == 0):
- warnings.warn(
- f"received a malformed packet, expected 'seq != 0' for {packet!r}"
- )
- # Handle events that may modify any stateful properties of
- # the DAP session.
- if packet and packet["type"] == "event":
- self._handle_event(packet)
- elif packet and packet["type"] == "request":
- # Handle reverse requests and keep processing.
- self._handle_reverse_request(packet)
- # Move the packet to the pending queue.
- self._pending_packets.append(packet)
- self._recv_packets.clear()
+ if packet and ("seq" not in packet or packet["seq"] == 0):
+ warnings.warn(
+ f"received a malformed packet, expected 'seq != 0' for {packet!r}"
+ )
+ # Handle events that may modify any stateful properties of
+ # the DAP session.
+ if packet and packet["type"] == "event":
+ self._handle_event(packet)
+ elif packet and packet["type"] == "request":
+ # Handle reverse requests and keep processing.
+ self._handle_reverse_request(packet)
def _handle_event(self, packet: Event) -> None:
"""Handle any events that modify debug session state we track."""
+ self.events.append(packet)
+
event = packet["event"]
body: Optional[Dict] = packet.get("body", None)
@@ -453,6 +402,8 @@ class DebugCommunication(object):
self.invalidated_event = packet
elif event == "memory":
self.memory_event = packet
+ elif event == "module":
+ self.module_events.append(packet)
def _handle_reverse_request(self, request: Request) -> None:
if request in self.reverse_requests:
@@ -521,18 +472,14 @@ class DebugCommunication(object):
Returns the seq number of the request.
"""
- # Set the seq for requests.
- if packet["type"] == "request":
- packet["seq"] = self.sequence
- self.sequence += 1
- else:
- packet["seq"] = 0
+ packet["seq"] = self.sequence
+ self.sequence += 1
# Encode our command dictionary as a JSON string
json_str = json.dumps(packet, separators=(",", ":"))
if self.trace_file:
- self.trace_file.write("to adapter:\n%s\n" % (json_str))
+ self.trace_file.write("%s to adapter:\n%s\n" % (time.time(), json_str))
length = len(json_str)
if length > 0:
@@ -913,6 +860,8 @@ class DebugCommunication(object):
if restartArguments:
command_dict["arguments"] = restartArguments
+ # Clear state, the process is about to restart...
+ self._process_continued(True)
response = self._send_recv(command_dict)
# Caller must still call wait_for_stopped.
return response
@@ -1479,8 +1428,10 @@ class DebugCommunication(object):
def terminate(self):
self.send.close()
- if self._recv_thread.is_alive():
- self._recv_thread.join()
+ self.recv.close()
+ self.selector.close()
+ if self.log_file:
+ dump_dap_log(self.log_file)
def request_setInstructionBreakpoints(self, memory_reference=[]):
breakpoints = []
@@ -1577,6 +1528,7 @@ class DebugAdapterServer(DebugCommunication):
stdout=subprocess.PIPE,
stderr=sys.stderr,
env=adapter_env,
+ bufsize=0,
)
if connection is None:
diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py
index 29935bb8046f..405e91fc2dc3 100644
--- a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py
+++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py
@@ -15,6 +15,8 @@ import base64
# DAP tests as a whole have been flakey on the Windows on Arm bot. See:
# https://github.com/llvm/llvm-project/issues/137660
@skipIf(oslist=["windows"], archs=["aarch64"])
+# The Arm Linux bot needs stable resources before it can run these tests reliably.
+@skipIf(oslist=["linux"], archs=["arm$"])
class DAPTestCaseBase(TestBase):
# set timeout based on whether ASAN was enabled or not. Increase
# timeout by a factor of 10 if ASAN is enabled.
@@ -416,7 +418,7 @@ class DAPTestCaseBase(TestBase):
return self.dap_server.wait_for_stopped()
def continue_to_breakpoint(self, breakpoint_id: str):
- self.continue_to_breakpoints((breakpoint_id))
+ self.continue_to_breakpoints([breakpoint_id])
def continue_to_breakpoints(self, breakpoint_ids):
self.do_continue()
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
index 36bc17680f3f..c049829f3721 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
@@ -450,6 +450,10 @@ ParsedDWARFTypeAttributes::ParsedDWARFTypeAttributes(const DWARFDIE &die) {
byte_size = form_value.Unsigned();
break;
+ case DW_AT_bit_size:
+ data_bit_size = form_value.Unsigned();
+ break;
+
case DW_AT_alignment:
alignment = form_value.Unsigned();
break;
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h
index da58f4c14622..f5f707129d67 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h
@@ -574,6 +574,7 @@ struct ParsedDWARFTypeAttributes {
lldb_private::plugin::dwarf::DWARFFormValue type;
lldb::LanguageType class_language = lldb::eLanguageTypeUnknown;
std::optional<uint64_t> byte_size;
+ std::optional<uint64_t> data_bit_size;
std::optional<uint64_t> alignment;
size_t calling_convention = llvm::dwarf::DW_CC_normal;
uint32_t bit_stride = 0;
diff --git a/lldb/test/API/commands/register/register/aarch64_dynamic_regset/TestArm64DynamicRegsets.py b/lldb/test/API/commands/register/register/aarch64_dynamic_regset/TestArm64DynamicRegsets.py
index eb121ecbfdba..a985ebbced71 100644
--- a/lldb/test/API/commands/register/register/aarch64_dynamic_regset/TestArm64DynamicRegsets.py
+++ b/lldb/test/API/commands/register/register/aarch64_dynamic_regset/TestArm64DynamicRegsets.py
@@ -97,6 +97,9 @@ class RegisterCommandsTestCase(TestBase):
@skipIf(oslist=no_match(["linux"]))
def test_aarch64_dynamic_regset_config(self):
"""Test AArch64 Dynamic Register sets configuration."""
+ if not self.isAArch64SVE():
+ self.skipTest("SVE must be present")
+
register_sets = self.setup_register_config_test()
for registerSet in register_sets:
@@ -259,6 +262,8 @@ class RegisterCommandsTestCase(TestBase):
def test_aarch64_dynamic_regset_config_sme_write_za_to_enable(self):
"""Test that ZA and ZT0 (if present) shows as 0s when disabled and
can be enabled by writing to ZA."""
+ if not self.isAArch64SVE():
+ self.skipTest("SVE must be present.")
if not self.isAArch64SME():
self.skipTest("SME must be present.")
@@ -270,6 +275,8 @@ class RegisterCommandsTestCase(TestBase):
def test_aarch64_dynamic_regset_config_sme_write_zt0_to_enable(self):
"""Test that ZA and ZT0 (if present) shows as 0s when disabled and
can be enabled by writing to ZT0."""
+ if not self.isAArch64SVE():
+ self.skipTest("SVE must be present.")
if not self.isAArch64SME():
self.skipTest("SME must be present.")
if not self.isAArch64SME2():
diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx-simulators/optional/TestDataFormatterLibcxxOptionalSimulator.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx-simulators/optional/TestDataFormatterLibcxxOptionalSimulator.py
index 3fefe87dcad9..7463f8897901 100644
--- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx-simulators/optional/TestDataFormatterLibcxxOptionalSimulator.py
+++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx-simulators/optional/TestDataFormatterLibcxxOptionalSimulator.py
@@ -53,6 +53,8 @@ for r in range(2):
# causing this test to fail. This was reverted in newer version of clang
# with commit 52a9ba7ca.
@skipIf(compiler="clang", compiler_version=["=", "17"])
+ @skipIf(compiler="clang", compiler_version=["=", "18"])
+ @skipIf(compiler="clang", compiler_version=["=", "19"])
@functools.wraps(LibcxxOptionalDataFormatterSimulatorTestCase._run_test)
def test_method(self, defines=defines):
LibcxxOptionalDataFormatterSimulatorTestCase._run_test(self, defines)
diff --git a/lldb/test/API/lang/cpp/libcxx-internals-recognizer/TestLibcxxInternalsRecognizer.py b/lldb/test/API/lang/cpp/libcxx-internals-recognizer/TestLibcxxInternalsRecognizer.py
index d8a729b322fe..2f942da604ff 100644
--- a/lldb/test/API/lang/cpp/libcxx-internals-recognizer/TestLibcxxInternalsRecognizer.py
+++ b/lldb/test/API/lang/cpp/libcxx-internals-recognizer/TestLibcxxInternalsRecognizer.py
@@ -9,7 +9,7 @@ class LibCxxInternalsRecognizerTestCase(TestBase):
NO_DEBUG_INFO_TESTCASE = True
@add_test_categories(["libc++"])
- @skipIf(compiler="clang", compiler_version=["<", "19.0"])
+ @skipIf(compiler="clang", compiler_version=["<=", "19.0"])
def test_frame_recognizer(self):
"""Test that implementation details of libc++ are hidden"""
self.build()
diff --git a/lldb/test/API/tools/lldb-dap/breakpoint-events/TestDAP_breakpointEvents.py b/lldb/test/API/tools/lldb-dap/breakpoint-events/TestDAP_breakpointEvents.py
index beab4d6c1f5a..7b78541fb4f8 100644
--- a/lldb/test/API/tools/lldb-dap/breakpoint-events/TestDAP_breakpointEvents.py
+++ b/lldb/test/API/tools/lldb-dap/breakpoint-events/TestDAP_breakpointEvents.py
@@ -81,24 +81,20 @@ class TestDAP_breakpointEvents(lldbdap_testcase.DAPTestCaseBase):
breakpoint["verified"], "expect foo breakpoint to not be verified"
)
- # Flush the breakpoint events.
- self.dap_server.wait_for_breakpoint_events()
-
# Continue to the breakpoint
- self.continue_to_breakpoints(dap_breakpoint_ids)
+ self.continue_to_breakpoint(foo_bp_id)
+ self.continue_to_next_stop() # foo_bp2
+ self.continue_to_breakpoint(main_bp_id)
+ self.continue_to_exit()
- verified_breakpoint_ids = []
- unverified_breakpoint_ids = []
- for breakpoint_event in self.dap_server.wait_for_breakpoint_events():
- breakpoint = breakpoint_event["body"]["breakpoint"]
- id = breakpoint["id"]
- if breakpoint["verified"]:
- verified_breakpoint_ids.append(id)
- else:
- unverified_breakpoint_ids.append(id)
+ bp_events = [e for e in self.dap_server.events if e["event"] == "breakpoint"]
- self.assertIn(main_bp_id, unverified_breakpoint_ids)
- self.assertIn(foo_bp_id, unverified_breakpoint_ids)
+ main_bp_events = [
+ e for e in bp_events if e["body"]["breakpoint"]["id"] == main_bp_id
+ ]
+ foo_bp_events = [
+ e for e in bp_events if e["body"]["breakpoint"]["id"] == foo_bp_id
+ ]
- self.assertIn(main_bp_id, verified_breakpoint_ids)
- self.assertIn(foo_bp_id, verified_breakpoint_ids)
+ self.assertTrue(main_bp_events)
+ self.assertTrue(foo_bp_events)
diff --git a/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py b/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py
index ca881f1d817c..09b13223e0a7 100644
--- a/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py
+++ b/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py
@@ -156,6 +156,7 @@ class TestDAP_launch(lldbdap_testcase.DAPTestCaseBase):
self.build_and_launch(
program, debuggerRoot=program_parent_dir, initCommands=commands
)
+ self.continue_to_exit()
output = self.get_console()
self.assertTrue(output and len(output) > 0, "expect console output")
lines = output.splitlines()
@@ -171,7 +172,6 @@ class TestDAP_launch(lldbdap_testcase.DAPTestCaseBase):
% (program_parent_dir, line[len(prefix) :]),
)
self.assertTrue(found, "verified lldb-dap working directory")
- self.continue_to_exit()
def test_sourcePath(self):
"""
diff --git a/lldb/test/API/tools/lldb-dap/module-event/TestDAP_module_event.py b/lldb/test/API/tools/lldb-dap/module-event/TestDAP_module_event.py
index 1f4afabbd161..9d1d17b704f7 100644
--- a/lldb/test/API/tools/lldb-dap/module-event/TestDAP_module_event.py
+++ b/lldb/test/API/tools/lldb-dap/module-event/TestDAP_module_event.py
@@ -1,58 +1,58 @@
-import dap_server
+"""
+Test 'module' events for dynamically loaded libraries.
+"""
+
from lldbsuite.test.decorators import *
from lldbsuite.test.lldbtest import *
-from lldbsuite.test import lldbutil
import lldbdap_testcase
-import re
class TestDAP_module_event(lldbdap_testcase.DAPTestCaseBase):
+ def lookup_module_id(self, name):
+ """Returns the identifier for the first module event starting with the given name."""
+ for event in self.dap_server.module_events:
+ if self.get_dict_value(event, ["body", "module", "name"]).startswith(name):
+ return self.get_dict_value(event, ["body", "module", "id"])
+ self.fail(f"No module events matching name={name}")
+
+ def module_events(self, id):
+ """Finds all module events by identifier."""
+ return [
+ event
+ for event in self.dap_server.module_events
+ if self.get_dict_value(event, ["body", "module", "id"]) == id
+ ]
+
+ def module_reasons(self, events):
+ """Returns the list of 'reason' values from the given events."""
+ return [event["body"]["reason"] for event in events]
+
@skipIfWindows
def test_module_event(self):
+ """
+ Test that module events are fired on target load and when the list of
+ dynamic libraries updates while running.
+ """
program = self.getBuildArtifact("a.out")
self.build_and_launch(program)
+ # We can analyze the order of events after the process exits.
+ self.continue_to_exit()
- source = "main.cpp"
- breakpoint1_line = line_number(source, "// breakpoint 1")
- breakpoint2_line = line_number(source, "// breakpoint 2")
- breakpoint3_line = line_number(source, "// breakpoint 3")
+ a_out_id = self.lookup_module_id("a.out")
+ a_out_events = self.module_events(id=a_out_id)
- breakpoint_ids = self.set_source_breakpoints(
- source, [breakpoint1_line, breakpoint2_line, breakpoint3_line]
+ self.assertIn(
+ "new",
+ self.module_reasons(a_out_events),
+ "Expected a.out to load during the debug session.",
)
- self.continue_to_breakpoints(breakpoint_ids)
-
- # We're now stopped at breakpoint 1 before the dlopen. Flush all the module events.
- event = self.dap_server.wait_for_event(["module"])
- while event is not None:
- event = self.dap_server.wait_for_event(["module"])
-
- # Continue to the second breakpoint, before the dlclose.
- self.continue_to_breakpoints(breakpoint_ids)
-
- # Make sure we got a module event for libother.
- event = self.dap_server.wait_for_event(["module"])
- self.assertIsNotNone(event, "didn't get a module event")
- module_name = event["body"]["module"]["name"]
- module_id = event["body"]["module"]["id"]
- self.assertEqual(event["body"]["reason"], "new")
- self.assertIn("libother", module_name)
-
- # Continue to the third breakpoint, after the dlclose.
- self.continue_to_breakpoints(breakpoint_ids)
-
- # Make sure we got a module event for libother.
- event = self.dap_server.wait_for_event(["module"])
- self.assertIsNotNone(event, "didn't get a module event")
- reason = event["body"]["reason"]
- self.assertEqual(reason, "removed")
- self.assertEqual(event["body"]["module"]["id"], module_id)
-
- # The removed module event should omit everything but the module id and name
- # as they are required fields.
- module_data = event["body"]["module"]
- required_keys = ["id", "name"]
- self.assertListEqual(list(module_data.keys()), required_keys)
- self.assertEqual(module_data["name"], "", "expects empty name.")
- self.continue_to_exit()
+ libother_id = self.lookup_module_id(
+ "libother." # libother.so or libother.dylib based on OS.
+ )
+ libother_events = self.module_events(id=libother_id)
+ self.assertEqual(
+ self.module_reasons(libother_events),
+ ["new", "removed"],
+ "Expected libother to be loaded then unloaded during the debug session.",
+ )
diff --git a/lldb/test/API/tools/lldb-dap/module/TestDAP_module.py b/lldb/test/API/tools/lldb-dap/module/TestDAP_module.py
index 0ed53dac5d86..2d00c512721c 100644
--- a/lldb/test/API/tools/lldb-dap/module/TestDAP_module.py
+++ b/lldb/test/API/tools/lldb-dap/module/TestDAP_module.py
@@ -64,19 +64,18 @@ class TestDAP_module(lldbdap_testcase.DAPTestCaseBase):
self.assertEqual(program, program_module["path"])
self.assertIn("addressRange", program_module)
+ self.continue_to_exit()
+
# Collect all the module names we saw as events.
module_new_names = []
module_changed_names = []
- module_event = self.dap_server.wait_for_event(["module"])
- while module_event is not None:
+ for module_event in self.dap_server.module_events:
reason = module_event["body"]["reason"]
if reason == "new":
module_new_names.append(module_event["body"]["module"]["name"])
elif reason == "changed":
module_changed_names.append(module_event["body"]["module"]["name"])
- module_event = self.dap_server.wait_for_event(["module"])
-
# Make sure we got an event for every active module.
self.assertNotEqual(len(module_new_names), 0)
for module in active_modules:
@@ -86,7 +85,6 @@ class TestDAP_module(lldbdap_testcase.DAPTestCaseBase):
# symbols got added.
self.assertNotEqual(len(module_changed_names), 0)
self.assertIn(program_module["name"], module_changed_names)
- self.continue_to_exit()
@skipIfWindows
def test_modules(self):
diff --git a/lldb/test/API/tools/lldb-dap/restart/TestDAP_restart_console.py b/lldb/test/API/tools/lldb-dap/restart/TestDAP_restart_console.py
index e1ad1425a993..fa62ec243f5c 100644
--- a/lldb/test/API/tools/lldb-dap/restart/TestDAP_restart_console.py
+++ b/lldb/test/API/tools/lldb-dap/restart/TestDAP_restart_console.py
@@ -30,7 +30,11 @@ class TestDAP_restart_console(lldbdap_testcase.DAPTestCaseBase):
if reason == "entry":
seen_stopped_event += 1
- self.assertEqual(seen_stopped_event, 1, "expect only one stopped entry event.")
+ self.assertEqual(
+ seen_stopped_event,
+ 1,
+ f"expect only one stopped entry event in {stopped_events}",
+ )
@skipIfAsan
@skipIfWindows
@@ -92,11 +96,13 @@ class TestDAP_restart_console(lldbdap_testcase.DAPTestCaseBase):
self.build_and_launch(program, console="integratedTerminal", stopOnEntry=True)
[bp_main] = self.set_function_breakpoints(["main"])
- self.dap_server.request_continue() # sends configuration done
- stopped_events = self.dap_server.wait_for_stopped()
+ self.dap_server.request_configurationDone()
+ stopped_threads = list(self.dap_server.thread_stop_reasons.values())
# We should be stopped at the entry point.
- self.assertGreaterEqual(len(stopped_events), 0, "expect stopped events")
- self.verify_stopped_on_entry(stopped_events)
+ self.assertEqual(
+ len(stopped_threads), 1, "Expected the main thread to be stopped on entry."
+ )
+ self.assertEqual(stopped_threads[0]["reason"], "entry")
# Then, if we continue, we should hit the breakpoint at main.
self.dap_server.request_continue()
@@ -105,8 +111,12 @@ class TestDAP_restart_console(lldbdap_testcase.DAPTestCaseBase):
# Restart and check that we still get a stopped event before reaching
# main.
self.dap_server.request_restart()
- stopped_events = self.dap_server.wait_for_stopped()
- self.verify_stopped_on_entry(stopped_events)
+ stopped_threads = list(self.dap_server.thread_stop_reasons.values())
+ # We should be stopped at the entry point.
+ self.assertEqual(
+ len(stopped_threads), 1, "Expected the main thread to be stopped on entry."
+ )
+ self.assertEqual(stopped_threads[0]["reason"], "entry")
# continue to main
self.dap_server.request_continue()
diff --git a/lldb/test/API/tools/lldb-dap/send-event/TestDAP_sendEvent.py b/lldb/test/API/tools/lldb-dap/send-event/TestDAP_sendEvent.py
index a01845669666..018402058917 100644
--- a/lldb/test/API/tools/lldb-dap/send-event/TestDAP_sendEvent.py
+++ b/lldb/test/API/tools/lldb-dap/send-event/TestDAP_sendEvent.py
@@ -32,7 +32,7 @@ class TestDAP_sendEvent(lldbdap_testcase.DAPTestCaseBase):
],
)
self.set_source_breakpoints(source, [breakpoint_line])
- self.continue_to_next_stop()
+ self.do_continue()
custom_event = self.dap_server.wait_for_event(
filter=["my-custom-event-no-body"]
diff --git a/lldb/unittests/SymbolFile/DWARF/DWARFASTParserClangTests.cpp b/lldb/unittests/SymbolFile/DWARF/DWARFASTParserClangTests.cpp
index 1abce6999874..064ed6d1d3e5 100644
--- a/lldb/unittests/SymbolFile/DWARF/DWARFASTParserClangTests.cpp
+++ b/lldb/unittests/SymbolFile/DWARF/DWARFASTParserClangTests.cpp
@@ -1651,3 +1651,93 @@ DWARF:
EXPECT_EQ(param_die, ast_parser.GetObjectParameter(sub2, context_die));
}
}
+
+TEST_F(DWARFASTParserClangTests, TestTypeBitSize) {
+ // Tests that we correctly parse DW_AT_bit_size of a DW_AT_base_type.
+
+ const char *yamldata = R"(
+--- !ELF
+FileHeader:
+ Class: ELFCLASS64
+ Data: ELFDATA2LSB
+ Type: ET_EXEC
+ Machine: EM_AARCH64
+DWARF:
+ debug_str:
+ - _BitInt(2)
+ debug_abbrev:
+ - ID: 0
+ Table:
+ - Code: 0x1
+ Tag: DW_TAG_compile_unit
+ Children: DW_CHILDREN_yes
+ Attributes:
+ - Attribute: DW_AT_language
+ Form: DW_FORM_data2
+ - Code: 0x2
+ Tag: DW_TAG_base_type
+ Children: DW_CHILDREN_no
+ Attributes:
+ - Attribute: DW_AT_name
+ Form: DW_FORM_strp
+ - Attribute: DW_AT_encoding
+ Form: DW_FORM_data1
+ - Attribute: DW_AT_byte_size
+ Form: DW_FORM_data1
+ - Attribute: DW_AT_bit_size
+ Form: DW_FORM_data1
+
+ debug_info:
+ - Version: 5
+ UnitType: DW_UT_compile
+ AddrSize: 8
+ Entries:
+
+# DW_TAG_compile_unit
+# DW_AT_language [DW_FORM_data2] (DW_LANG_C_plus_plus)
+
+ - AbbrCode: 0x1
+ Values:
+ - Value: 0x04
+
+# DW_TAG_base_type
+# DW_AT_name [DW_FORM_strp] ('_BitInt(2)')
+
+ - AbbrCode: 0x2
+ Values:
+ - Value: 0x0
+ - Value: 0x05
+ - Value: 0x01
+ - Value: 0x02
+...
+)";
+
+ YAMLModuleTester t(yamldata);
+
+ DWARFUnit *unit = t.GetDwarfUnit();
+ ASSERT_NE(unit, nullptr);
+ const DWARFDebugInfoEntry *cu_entry = unit->DIE().GetDIE();
+ ASSERT_EQ(cu_entry->Tag(), DW_TAG_compile_unit);
+ ASSERT_EQ(unit->GetDWARFLanguageType(), DW_LANG_C_plus_plus);
+ DWARFDIE cu_die(unit, cu_entry);
+
+ auto holder = std::make_unique<clang_utils::TypeSystemClangHolder>("ast");
+ auto &ast_ctx = *holder->GetAST();
+ DWARFASTParserClangStub ast_parser(ast_ctx);
+
+ auto type_die = cu_die.GetFirstChild();
+ ASSERT_TRUE(type_die.IsValid());
+ ASSERT_EQ(type_die.Tag(), DW_TAG_base_type);
+
+ ParsedDWARFTypeAttributes attrs(type_die);
+ EXPECT_EQ(attrs.byte_size.value_or(0), 1U);
+ EXPECT_EQ(attrs.data_bit_size.value_or(0), 2U);
+
+ SymbolContext sc;
+ auto type_sp =
+ ast_parser.ParseTypeFromDWARF(sc, type_die, /*type_is_new_ptr=*/nullptr);
+ ASSERT_NE(type_sp, nullptr);
+
+ EXPECT_EQ(llvm::expectedToOptional(type_sp->GetByteSize(nullptr)).value_or(0),
+ 1U);
+}
diff --git a/llvm/docs/GettingInvolved.rst b/llvm/docs/GettingInvolved.rst
index 4b4b09ad87ab..039d61624093 100644
--- a/llvm/docs/GettingInvolved.rst
+++ b/llvm/docs/GettingInvolved.rst
@@ -223,6 +223,10 @@ what to add to your calendar invite.
- `ics <https://calendar.google.com/calendar/ical/c_673c6cd64474c0aff173bf8fa609559f93d654e0984d9d91d71abd32d28c0486%40group.calendar.google.com/public/basic.ics>`__
`gcal <https://calendar.google.com/calendar/embed?src=c_673c6cd64474c0aff173bf8fa609559f93d654e0984d9d91d71abd32d28c0486%40group.calendar.google.com&ctz=America%2FLos_Angeles>`__
-
+ * - GlobalISel
+ - Every 2nd Tuesday of the month
+ - `gcal <https://calendar.google.com/calendar/u/0?cid=ZDcyMjc0ZjZiZjNhMzFlYmE3NTNkMWM2MGM2NjM5ZWU3ZDE2MjM4MGFlZDc2ZjViY2UyYzMwNzVhZjk4MzQ4ZEBncm91cC5jYWxlbmRhci5nb29nbGUuY29t>`__
+ - `Meeting details/agenda <https://docs.google.com/document/d/1Ry8O4-Tm5BFj9AMjr8qTQFU80z-ptiNQ62687NaIvLs/edit?usp=sharing>`__
For event owners, our Discord bot also supports sending automated announcements
@@ -254,10 +258,6 @@ the future.
- `ics <https://calendar.google.com/calendar/ical/c_1mincouiltpa24ac14of14lhi4%40group.calendar.google.com/public/basic.ics>`__
`gcal <https://calendar.google.com/calendar/embed?src=c_1mincouiltpa24ac14of14lhi4%40group.calendar.google.com>`__
- `Minutes/docs <https://docs.google.com/document/d/1-uEEZfmRdPThZlctOq9eXlmUaSSAAi8oKxhrPY_lpjk/edit#>`__
- * - GlobalISel
- - Every 2nd Tuesday of the month
- - `gcal <https://calendar.google.com/calendar/u/0?cid=ZDcyMjc0ZjZiZjNhMzFlYmE3NTNkMWM2MGM2NjM5ZWU3ZDE2MjM4MGFlZDc2ZjViY2UyYzMwNzVhZjk4MzQ4ZEBncm91cC5jYWxlbmRhci5nb29nbGUuY29t>`__
- - `Meeting details/agenda <https://docs.google.com/document/d/1Ry8O4-Tm5BFj9AMjr8qTQFU80z-ptiNQ62687NaIvLs/edit?usp=sharing>`__
* - Vector Predication
- Every 2 weeks on Tuesdays, 3pm UTC
-
diff --git a/llvm/include/llvm/ADT/TypeSwitch.h b/llvm/include/llvm/ADT/TypeSwitch.h
index 5657303b0a1f..50ca1d5a6b5b 100644
--- a/llvm/include/llvm/ADT/TypeSwitch.h
+++ b/llvm/include/llvm/ADT/TypeSwitch.h
@@ -111,6 +111,7 @@ public:
return std::move(*result);
return defaultFn(this->value);
}
+
/// As a default, return the given value.
[[nodiscard]] ResultT Default(ResultT defaultResult) {
if (result)
@@ -118,6 +119,22 @@ public:
return defaultResult;
}
+ /// Default for pointer-like results types that accept `nullptr`.
+ template <typename ArgT = ResultT,
+ typename =
+ std::enable_if_t<std::is_constructible_v<ArgT, std::nullptr_t>>>
+ [[nodiscard]] ResultT Default(std::nullptr_t) {
+ return Default(ResultT(nullptr));
+ }
+
+ /// Default for optional results types that accept `std::nullopt`.
+ template <typename ArgT = ResultT,
+ typename =
+ std::enable_if_t<std::is_constructible_v<ArgT, std::nullopt_t>>>
+ [[nodiscard]] ResultT Default(std::nullopt_t) {
+ return Default(ResultT(std::nullopt));
+ }
+
/// Declare default as unreachable, making sure that all cases were handled.
[[nodiscard]] ResultT DefaultUnreachable(
const char *message = "Fell off the end of a type-switch") {
diff --git a/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h b/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h
index ced446dacb6c..9dcd4b53a0db 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h
@@ -26,8 +26,6 @@
namespace llvm {
-LLVM_ABI extern cl::opt<bool> DebugInfoCorrelate;
-
class Function;
class Instruction;
class Module;
diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp
index 84ee8c0bf3e1..11d829492a10 100644
--- a/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -2854,14 +2854,18 @@ bool DependenceInfo::testMIV(const SCEV *Src, const SCEV *Dst,
banerjeeMIVtest(Src, Dst, Loops, Result);
}
-// Given a product, e.g., 10*X*Y, returns the first constant operand,
-// in this case 10. If there is no constant part, returns std::nullopt.
-static std::optional<APInt> getConstantPart(const SCEV *Expr) {
+/// Given a SCEVMulExpr, returns its first operand if its first operand is a
+/// constant and the product doesn't overflow in a signed sense. Otherwise,
+/// returns std::nullopt. For example, given (10 * X * Y)<nsw>, it returns 10.
+/// Notably, if it doesn't have nsw, the multiplication may overflow, and if
+/// so, it may not a multiple of 10.
+static std::optional<APInt> getConstanCoefficient(const SCEV *Expr) {
if (const auto *Constant = dyn_cast<SCEVConstant>(Expr))
return Constant->getAPInt();
if (const auto *Product = dyn_cast<SCEVMulExpr>(Expr))
if (const auto *Constant = dyn_cast<SCEVConstant>(Product->getOperand(0)))
- return Constant->getAPInt();
+ if (Product->hasNoSignedWrap())
+ return Constant->getAPInt();
return std::nullopt;
}
@@ -2887,7 +2891,7 @@ bool DependenceInfo::accumulateCoefficientsGCD(const SCEV *Expr,
if (AddRec->getLoop() == CurLoop) {
CurLoopCoeff = Step;
} else {
- std::optional<APInt> ConstCoeff = getConstantPart(Step);
+ std::optional<APInt> ConstCoeff = getConstanCoefficient(Step);
// If the coefficient is the product of a constant and other stuff, we can
// use the constant in the GCD computation.
@@ -2940,7 +2944,7 @@ bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst,
const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
// If the coefficient is the product of a constant and other stuff,
// we can use the constant in the GCD computation.
- std::optional<APInt> ConstCoeff = getConstantPart(Coeff);
+ std::optional<APInt> ConstCoeff = getConstanCoefficient(Coeff);
if (!ConstCoeff)
return false;
RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff->abs());
@@ -2958,7 +2962,7 @@ bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst,
const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
// If the coefficient is the product of a constant and other stuff,
// we can use the constant in the GCD computation.
- std::optional<APInt> ConstCoeff = getConstantPart(Coeff);
+ std::optional<APInt> ConstCoeff = getConstanCoefficient(Coeff);
if (!ConstCoeff)
return false;
RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff->abs());
@@ -2979,7 +2983,7 @@ bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst,
} else if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Operand)) {
// Search for constant operand to participate in GCD;
// If none found; return false.
- std::optional<APInt> ConstOp = getConstantPart(Product);
+ std::optional<APInt> ConstOp = getConstanCoefficient(Product);
if (!ConstOp)
return false;
ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD, ConstOp->abs());
@@ -3032,7 +3036,7 @@ bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst,
Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff);
// If the coefficient is the product of a constant and other stuff,
// we can use the constant in the GCD computation.
- std::optional<APInt> ConstCoeff = getConstantPart(Delta);
+ std::optional<APInt> ConstCoeff = getConstanCoefficient(Delta);
if (!ConstCoeff)
// The difference of the two coefficients might not be a product
// or constant, in which case we give up on this direction.
diff --git a/llvm/lib/Frontend/Driver/CodeGenOptions.cpp b/llvm/lib/Frontend/Driver/CodeGenOptions.cpp
index df884908845d..b546e816419e 100644
--- a/llvm/lib/Frontend/Driver/CodeGenOptions.cpp
+++ b/llvm/lib/Frontend/Driver/CodeGenOptions.cpp
@@ -12,7 +12,6 @@
#include "llvm/TargetParser/Triple.h"
namespace llvm {
-extern llvm::cl::opt<bool> DebugInfoCorrelate;
extern llvm::cl::opt<llvm::InstrProfCorrelator::ProfCorrelatorKind>
ProfileCorrelate;
} // namespace llvm
@@ -64,8 +63,7 @@ TargetLibraryInfoImpl *createTLII(const llvm::Triple &TargetTriple,
}
std::string getDefaultProfileGenName() {
- return llvm::DebugInfoCorrelate ||
- llvm::ProfileCorrelate != InstrProfCorrelator::NONE
+ return llvm::ProfileCorrelate != InstrProfCorrelator::NONE
? "default_%m.proflite"
: "default_%m.profraw";
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 9ce12243016f..aed325cf627b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -221,12 +221,22 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
bool AMDGPUInstructionSelector::selectCOPY_SCC_VCC(MachineInstr &I) const {
const DebugLoc &DL = I.getDebugLoc();
MachineBasicBlock *BB = I.getParent();
+ Register VCCReg = I.getOperand(1).getReg();
+ MachineInstr *Cmp;
+
+ if (STI.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ unsigned CmpOpc =
+ STI.isWave64() ? AMDGPU::S_CMP_LG_U64 : AMDGPU::S_CMP_LG_U32;
+ Cmp = BuildMI(*BB, &I, DL, TII.get(CmpOpc)).addReg(VCCReg).addImm(0);
+ } else {
+ // For gfx7 and earlier, S_CMP_LG_U64 doesn't exist, so we use S_OR_B64
+ // which sets SCC as a side effect.
+ Register DeadDst = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ Cmp = BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_OR_B64), DeadDst)
+ .addReg(VCCReg)
+ .addReg(VCCReg);
+ }
- unsigned CmpOpc =
- STI.isWave64() ? AMDGPU::S_CMP_LG_U64 : AMDGPU::S_CMP_LG_U32;
- MachineInstr *Cmp = BuildMI(*BB, &I, DL, TII.get(CmpOpc))
- .addReg(I.getOperand(1).getReg())
- .addImm(0);
if (!constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI))
return false;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
index 540756653dd2..b84c30ecaac0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
@@ -500,6 +500,16 @@ void RegBankLegalizeHelper::lowerUnpackMinMax(MachineInstr &MI) {
MI.eraseFromParent();
}
+void RegBankLegalizeHelper::lowerUnpackAExt(MachineInstr &MI) {
+ auto [Op1Lo, Op1Hi] = unpackAExt(MI.getOperand(1).getReg());
+ auto [Op2Lo, Op2Hi] = unpackAExt(MI.getOperand(2).getReg());
+ auto ResLo = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Op1Lo, Op2Lo});
+ auto ResHi = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Op1Hi, Op2Hi});
+ B.buildBuildVectorTrunc(MI.getOperand(0).getReg(),
+ {ResLo.getReg(0), ResHi.getReg(0)});
+ MI.eraseFromParent();
+}
+
static bool isSignedBFE(MachineInstr &MI) {
if (GIntrinsic *GI = dyn_cast<GIntrinsic>(&MI))
return (GI->is(Intrinsic::amdgcn_sbfe));
@@ -804,6 +814,8 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI,
}
break;
}
+ case UnpackAExt:
+ return lowerUnpackAExt(MI);
case WidenMMOToS32:
return widenMMOToS32(cast<GAnyLoad>(MI));
}
@@ -1120,7 +1132,8 @@ void RegBankLegalizeHelper::applyMappingDst(
assert(RB == SgprRB);
Register NewDst = MRI.createVirtualRegister(SgprRB_S32);
Op.setReg(NewDst);
- B.buildTrunc(Reg, NewDst);
+ if (!MRI.use_empty(Reg))
+ B.buildTrunc(Reg, NewDst);
break;
}
case InvalidMapping: {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h
index d937815bf471..ad3ff1d374ec 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h
@@ -124,6 +124,7 @@ private:
void lowerSplitTo32Select(MachineInstr &MI);
void lowerSplitTo32SExtInReg(MachineInstr &MI);
void lowerUnpackMinMax(MachineInstr &MI);
+ void lowerUnpackAExt(MachineInstr &MI);
};
} // end namespace AMDGPU
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index a67b12a22589..01abd358ff59 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -470,7 +470,19 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
.Uni(S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}})
.Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
.Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
- .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}});
+ .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
+ .Uni(V2S16, {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackAExt})
+ .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
+ .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr64}})
+ .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}});
+
+ addRulesForGOpcs({G_UADDO, G_USUBO}, Standard)
+ .Uni(S32, {{Sgpr32, Sgpr32Trunc}, {Sgpr32, Sgpr32}})
+ .Div(S32, {{Vgpr32, Vcc}, {Vgpr32, Vgpr32}});
+
+ addRulesForGOpcs({G_UADDE, G_USUBE}, Standard)
+ .Uni(S32, {{Sgpr32, Sgpr32Trunc}, {Sgpr32, Sgpr32, Sgpr32AExtBoolInReg}})
+ .Div(S32, {{Vgpr32, Vcc}, {Vgpr32, Vgpr32, Vcc}});
addRulesForGOpcs({G_MUL}, Standard).Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}});
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
index 93e0efda77fd..030bd75f8cd1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
@@ -223,7 +223,8 @@ enum LoweringMethodID {
UniCstExt,
SplitLoad,
WidenLoad,
- WidenMMOToS32
+ WidenMMOToS32,
+ UnpackAExt
};
enum FastRulesTypes {
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index b34ab2a7e08e..8bb28084159e 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -7035,9 +7035,15 @@ static SDValue lowerBALLOTIntrinsic(const SITargetLowering &TLI, SDNode *N,
SDLoc SL(N);
if (Src.getOpcode() == ISD::SETCC) {
+ SDValue Op0 = Src.getOperand(0);
+ SDValue Op1 = Src.getOperand(1);
+ // Need to expand bfloat to float for comparison (setcc).
+ if (Op0.getValueType() == MVT::bf16) {
+ Op0 = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Op0);
+ Op1 = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Op1);
+ }
// (ballot (ISD::SETCC ...)) -> (AMDGPUISD::SETCC ...)
- return DAG.getNode(AMDGPUISD::SETCC, SL, VT, Src.getOperand(0),
- Src.getOperand(1), Src.getOperand(2));
+ return DAG.getNode(AMDGPUISD::SETCC, SL, VT, Op0, Op1, Src.getOperand(2));
}
if (const ConstantSDNode *Arg = dyn_cast<ConstantSDNode>(Src)) {
// (ballot 0) -> 0
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index a4d3d62e9f48..6b0653457cba 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -22109,6 +22109,11 @@ bool ARMTargetLowering::isComplexDeinterleavingOperationSupported(
ScalarTy->isIntegerTy(32));
}
+ArrayRef<MCPhysReg> ARMTargetLowering::getRoundingControlRegisters() const {
+ static const MCPhysReg RCRegs[] = {ARM::FPSCR_RM};
+ return RCRegs;
+}
+
Value *ARMTargetLowering::createComplexDeinterleavingIR(
IRBuilderBase &B, ComplexDeinterleavingOperation OperationType,
ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 357d2c5d2fad..bf3438b0d880 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -1009,6 +1009,8 @@ class VectorType;
bool isUnsupportedFloatingType(EVT VT) const;
+ ArrayRef<MCPhysReg> getRoundingControlRegisters() const override;
+
SDValue getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal, SDValue TrueVal,
SDValue ARMcc, SDValue Flags, SelectionDAG &DAG) const;
SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 9a6afa1cd4ea..b25a05400fe3 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -3995,6 +3995,7 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits,
case RISCV::CTZW:
case RISCV::CPOPW:
case RISCV::SLLI_UW:
+ case RISCV::ABSW:
case RISCV::FMV_W_X:
case RISCV::FCVT_H_W:
case RISCV::FCVT_H_W_INX:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 1c930acd9c4a..56881f71934c 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -433,6 +433,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
if (Subtarget.hasStdExtP() ||
(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
setOperationAction(ISD::ABS, XLenVT, Legal);
+ if (Subtarget.is64Bit())
+ setOperationAction(ISD::ABS, MVT::i32, Custom);
} else if (Subtarget.hasShortForwardBranchOpt()) {
// We can use PseudoCCSUB to implement ABS.
setOperationAction(ISD::ABS, XLenVT, Legal);
@@ -14816,8 +14818,16 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
+ if (Subtarget.hasStdExtP()) {
+ SDValue Src =
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
+ SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
+ return;
+ }
+
if (Subtarget.hasStdExtZbb()) {
- // Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
+ // Emit a special node that will be expanded to NEGW+MAX at isel.
// This allows us to remember that the result is sign extended. Expanding
// to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
@@ -20290,6 +20300,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
break;
}
+ case RISCVISD::ABSW:
case RISCVISD::CLZW:
case RISCVISD::CTZW: {
// Only the lower 32 bits of the first operand are read
@@ -21862,6 +21873,7 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
case RISCVISD::REMUW:
case RISCVISD::ROLW:
case RISCVISD::RORW:
+ case RISCVISD::ABSW:
case RISCVISD::FCVT_W_RV64:
case RISCVISD::FCVT_WU_RV64:
case RISCVISD::STRICT_FCVT_W_RV64:
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index cc085bb6c9fd..4cbbba3aa68c 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -1461,5 +1461,10 @@ let Predicates = [HasStdExtP, IsRV32] in {
// Codegen patterns
//===----------------------------------------------------------------------===//
+def riscv_absw : RVSDNode<"ABSW", SDTIntUnaryOp>;
+
let Predicates = [HasStdExtP] in
def : PatGpr<abs, ABS>;
+
+let Predicates = [HasStdExtP, IsRV64] in
+def : PatGpr<riscv_absw, ABSW>;
diff --git a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
index d08115b72977..ea98cdb4a1e6 100644
--- a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
+++ b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
@@ -172,6 +172,7 @@ static bool hasAllNBitUsers(const MachineInstr &OrigMI,
case RISCV::CTZW:
case RISCV::CPOPW:
case RISCV::SLLI_UW:
+ case RISCV::ABSW:
case RISCV::FMV_W_X:
case RISCV::FCVT_H_W:
case RISCV::FCVT_H_W_INX:
diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 7795cce9d9d3..b5548d4f24a2 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -69,14 +69,6 @@ namespace llvm {
// Command line option to enable vtable value profiling. Defined in
// ProfileData/InstrProf.cpp: -enable-vtable-value-profiling=
extern cl::opt<bool> EnableVTableValueProfiling;
-// TODO: Remove -debug-info-correlate in next LLVM release, in favor of
-// -profile-correlate=debug-info.
-cl::opt<bool> DebugInfoCorrelate(
- "debug-info-correlate",
- cl::desc("Use debug info to correlate profiles. (Deprecated, use "
- "-profile-correlate=debug-info)"),
- cl::init(false));
-
LLVM_ABI cl::opt<InstrProfCorrelator::ProfCorrelatorKind> ProfileCorrelate(
"profile-correlate",
cl::desc("Use debug info or binary file to correlate profiles."),
@@ -1047,7 +1039,7 @@ void InstrLowerer::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
// in lightweight mode. We need to move the value profile pointer to the
// Counter struct to get this working.
assert(
- !DebugInfoCorrelate && ProfileCorrelate == InstrProfCorrelator::NONE &&
+ ProfileCorrelate == InstrProfCorrelator::NONE &&
"Value profiling is not yet supported with lightweight instrumentation");
GlobalVariable *Name = Ind->getName();
auto It = ProfileDataMap.find(Name);
@@ -1504,7 +1496,7 @@ static inline Constant *getVTableAddrForProfData(GlobalVariable *GV) {
}
void InstrLowerer::getOrCreateVTableProfData(GlobalVariable *GV) {
- assert(!DebugInfoCorrelate &&
+ assert(ProfileCorrelate != InstrProfCorrelator::DEBUG_INFO &&
"Value profiling is not supported with lightweight instrumentation");
if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage())
return;
@@ -1584,8 +1576,7 @@ GlobalVariable *InstrLowerer::setupProfileSection(InstrProfInstBase *Inc,
// Use internal rather than private linkage so the counter variable shows up
// in the symbol table when using debug info for correlation.
- if ((DebugInfoCorrelate ||
- ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) &&
+ if (ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO &&
TT.isOSBinFormatMachO() && Linkage == GlobalValue::PrivateLinkage)
Linkage = GlobalValue::InternalLinkage;
@@ -1691,8 +1682,7 @@ InstrLowerer::getOrCreateRegionCounters(InstrProfCntrInstBase *Inc) {
auto *CounterPtr = setupProfileSection(Inc, IPSK_cnts);
PD.RegionCounters = CounterPtr;
- if (DebugInfoCorrelate ||
- ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) {
+ if (ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) {
LLVMContext &Ctx = M.getContext();
Function *Fn = Inc->getParent()->getParent();
if (auto *SP = Fn->getSubprogram()) {
@@ -1737,7 +1727,7 @@ InstrLowerer::getOrCreateRegionCounters(InstrProfCntrInstBase *Inc) {
void InstrLowerer::createDataVariable(InstrProfCntrInstBase *Inc) {
// When debug information is correlated to profile data, a data variable
// is not needed.
- if (DebugInfoCorrelate || ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO)
+ if (ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO)
return;
GlobalVariable *NamePtr = Inc->getName();
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index 71736cfa4d89..af53fa0bae46 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -456,7 +456,7 @@ createIRLevelProfileFlagVar(Module &M,
ProfileVersion |= VARIANT_MASK_INSTR_ENTRY;
if (PGOInstrumentLoopEntries)
ProfileVersion |= VARIANT_MASK_INSTR_LOOP_ENTRIES;
- if (DebugInfoCorrelate || ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO)
+ if (ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO)
ProfileVersion |= VARIANT_MASK_DBG_CORRELATE;
if (PGOFunctionEntryCoverage)
ProfileVersion |=
diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index 5af6c96c56a0..bb6c879f4d47 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -81,6 +81,7 @@ STATISTIC(
STATISTIC(NumInvariantConditionsInjected,
"Number of invariant conditions injected and unswitched");
+namespace llvm {
static cl::opt<bool> EnableNonTrivialUnswitch(
"enable-nontrivial-unswitch", cl::init(false), cl::Hidden,
cl::desc("Forcibly enables non-trivial loop unswitching rather than "
@@ -131,11 +132,17 @@ static cl::opt<bool> InjectInvariantConditions(
static cl::opt<unsigned> InjectInvariantConditionHotnesThreshold(
"simple-loop-unswitch-inject-invariant-condition-hotness-threshold",
- cl::Hidden, cl::desc("Only try to inject loop invariant conditions and "
- "unswitch on them to eliminate branches that are "
- "not-taken 1/<this option> times or less."),
+ cl::Hidden,
+ cl::desc("Only try to inject loop invariant conditions and "
+ "unswitch on them to eliminate branches that are "
+ "not-taken 1/<this option> times or less."),
cl::init(16));
+static cl::opt<bool> EstimateProfile("simple-loop-unswitch-estimate-profile",
+ cl::Hidden, cl::init(true));
+extern cl::opt<bool> ProfcheckDisableMetadataFixes;
+} // namespace llvm
+
AnalysisKey ShouldRunExtraSimpleLoopUnswitch::Key;
namespace {
struct CompareDesc {
@@ -268,13 +275,42 @@ static bool areLoopExitPHIsLoopInvariant(const Loop &L,
llvm_unreachable("Basic blocks should never be empty!");
}
-/// Copy a set of loop invariant values \p ToDuplicate and insert them at the
+/// Copy a set of loop invariant values \p Invariants and insert them at the
/// end of \p BB and conditionally branch on the copied condition. We only
/// branch on a single value.
+/// We attempt to estimate the profile of the resulting conditional branch from
+/// \p ComputeProfFrom, which is the original conditional branch we're
+/// unswitching.
+/// When \p Direction is true, the \p Invariants form a disjunction, and the
+/// branch conditioned on it exits the loop on the "true" case. When \p
+/// Direction is false, the \p Invariants form a conjunction and the branch
+/// exits on the "false" case.
static void buildPartialUnswitchConditionalBranch(
BasicBlock &BB, ArrayRef<Value *> Invariants, bool Direction,
BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze,
- const Instruction *I, AssumptionCache *AC, const DominatorTree &DT) {
+ const Instruction *I, AssumptionCache *AC, const DominatorTree &DT,
+ const BranchInst &ComputeProfFrom) {
+
+ SmallVector<uint32_t> BranchWeights;
+ bool HasBranchWeights = EstimateProfile && !ProfcheckDisableMetadataFixes &&
+ extractBranchWeights(ComputeProfFrom, BranchWeights);
+ // If Direction is true, that means we had a disjunction and that the "true"
+ // case exits. The probability of the disjunction of the subset of terms is at
+ // most as high as the original one. So, if the probability is higher than the
+ // one we'd assign in absence of a profile (i.e. 0.5), we will use 0.5,
+ // but if it's lower, we will use the original probability.
+ // Conversely, if Direction is false, that means we had a conjunction, and the
+ // probability of exiting is captured in the second branch weight. That
+ // probability is a disjunction (of the negation of the original terms). The
+ // same reasoning applies as above.
+ // Issue #165649: should we expect BFI to conserve, and use that to calculate
+ // the branch weights?
+ if (HasBranchWeights &&
+ static_cast<double>(BranchWeights[Direction ? 0 : 1]) /
+ static_cast<double>(sum_of(BranchWeights)) >
+ 0.5)
+ HasBranchWeights = false;
+
IRBuilder<> IRB(&BB);
IRB.SetCurrentDebugLocation(DebugLoc::getCompilerGenerated());
@@ -287,8 +323,14 @@ static void buildPartialUnswitchConditionalBranch(
Value *Cond = Direction ? IRB.CreateOr(FrozenInvariants)
: IRB.CreateAnd(FrozenInvariants);
- IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
- Direction ? &NormalSucc : &UnswitchedSucc);
+ auto *BR = IRB.CreateCondBr(
+ Cond, Direction ? &UnswitchedSucc : &NormalSucc,
+ Direction ? &NormalSucc : &UnswitchedSucc,
+ HasBranchWeights ? ComputeProfFrom.getMetadata(LLVMContext::MD_prof)
+ : nullptr);
+ if (!HasBranchWeights)
+ setExplicitlyUnknownBranchWeightsIfProfiled(
+ *BR, *BR->getParent()->getParent(), DEBUG_TYPE);
}
/// Copy a set of loop invariant values, and conditionally branch on them.
@@ -658,7 +700,7 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
" condition!");
buildPartialUnswitchConditionalBranch(
*OldPH, Invariants, ExitDirection, *UnswitchedBB, *NewPH,
- FreezeLoopUnswitchCond, OldPH->getTerminator(), nullptr, DT);
+ FreezeLoopUnswitchCond, OldPH->getTerminator(), nullptr, DT, BI);
}
// Update the dominator tree with the added edge.
@@ -2477,7 +2519,7 @@ static void unswitchNontrivialInvariants(
else {
buildPartialUnswitchConditionalBranch(
*SplitBB, Invariants, Direction, *ClonedPH, *LoopPH,
- FreezeLoopUnswitchCond, BI, &AC, DT);
+ FreezeLoopUnswitchCond, BI, &AC, DT, *BI);
}
DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 8ebc10808027..505fb435e91e 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8340,11 +8340,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
&R) ||
(isa<VPInstruction>(&R) && !UnderlyingValue))
continue;
-
- // FIXME: VPlan0, which models a copy of the original scalar loop, should
- // not use VPWidenPHIRecipe to model the phis.
- assert((isa<VPWidenPHIRecipe>(&R) || isa<VPInstruction>(&R)) &&
- UnderlyingValue && "unsupported recipe");
+ assert(isa<VPInstruction>(&R) && UnderlyingValue && "unsupported recipe");
// TODO: Gradually replace uses of underlying instruction by analyses on
// VPlan.
diff --git a/llvm/test/Analysis/DependenceAnalysis/GCD.ll b/llvm/test/Analysis/DependenceAnalysis/GCD.ll
index 03343e7a9821..cb14d189afe4 100644
--- a/llvm/test/Analysis/DependenceAnalysis/GCD.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/GCD.ll
@@ -254,7 +254,7 @@ define void @gcd4(ptr %A, ptr %B, i64 %M, i64 %N) nounwind uwtable ssp {
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %conv, ptr %arrayidx, align 4
; CHECK-NEXT: da analyze - output [* *]!
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx16, align 4
-; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: da analyze - flow [* *|<]!
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %0, ptr %B.addr.11, align 4
; CHECK-NEXT: da analyze - confused!
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx16, align 4 --> Dst: %0 = load i32, ptr %arrayidx16, align 4
@@ -322,7 +322,7 @@ define void @gcd5(ptr %A, ptr %B, i64 %M, i64 %N) nounwind uwtable ssp {
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %conv, ptr %arrayidx, align 4
; CHECK-NEXT: da analyze - output [* *]!
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx16, align 4
-; CHECK-NEXT: da analyze - flow [<> *]!
+; CHECK-NEXT: da analyze - flow [* *|<]!
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %0, ptr %B.addr.11, align 4
; CHECK-NEXT: da analyze - confused!
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx16, align 4 --> Dst: %0 = load i32, ptr %arrayidx16, align 4
@@ -390,7 +390,7 @@ define void @gcd6(i64 %n, ptr %A, ptr %B) nounwind uwtable ssp {
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx5, align 4 --> Dst: store i32 %conv, ptr %arrayidx5, align 4
; CHECK-NEXT: da analyze - output [* *]!
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx5, align 4 --> Dst: %2 = load i32, ptr %arrayidx9, align 4
-; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: da analyze - flow [* *|<]!
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx5, align 4 --> Dst: store i32 %2, ptr %B.addr.12, align 4
; CHECK-NEXT: da analyze - confused!
; CHECK-NEXT: Src: %2 = load i32, ptr %arrayidx9, align 4 --> Dst: %2 = load i32, ptr %arrayidx9, align 4
diff --git a/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll b/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll
index cdfaec76fa89..73a415baef4c 100644
--- a/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll
@@ -384,7 +384,7 @@ define void @symbolicsiv6(ptr %A, ptr %B, i64 %n, i64 %N, i64 %M) nounwind uwtab
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %conv, ptr %arrayidx, align 4
; CHECK-NEXT: da analyze - none!
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx7, align 4
-; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: da analyze - flow [*|<]!
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
; CHECK-NEXT: da analyze - confused!
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx7, align 4 --> Dst: %0 = load i32, ptr %arrayidx7, align 4
@@ -440,7 +440,7 @@ define void @symbolicsiv7(ptr %A, ptr %B, i64 %n, i64 %N, i64 %M) nounwind uwtab
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %conv, ptr %arrayidx, align 4
; CHECK-NEXT: da analyze - none!
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %1 = load i32, ptr %arrayidx6, align 4
-; CHECK-NEXT: da analyze - flow [<>]!
+; CHECK-NEXT: da analyze - flow [*|<]!
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %1, ptr %B.addr.02, align 4
; CHECK-NEXT: da analyze - confused!
; CHECK-NEXT: Src: %1 = load i32, ptr %arrayidx6, align 4 --> Dst: %1 = load i32, ptr %arrayidx6, align 4
diff --git a/llvm/test/Analysis/DependenceAnalysis/compute-absolute-value.ll b/llvm/test/Analysis/DependenceAnalysis/compute-absolute-value.ll
index 64fad37ab699..783150af2cd1 100644
--- a/llvm/test/Analysis/DependenceAnalysis/compute-absolute-value.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/compute-absolute-value.ll
@@ -18,7 +18,7 @@ define void @unknown_sign(ptr %a, i64 %k) {
; CHECK-NEXT: Src: store i8 1, ptr %idx.0, align 1 --> Dst: store i8 1, ptr %idx.0, align 1
; CHECK-NEXT: da analyze - none!
; CHECK-NEXT: Src: store i8 1, ptr %idx.0, align 1 --> Dst: store i8 2, ptr %idx.1, align 1
-; CHECK-NEXT: da analyze - output [<>]!
+; CHECK-NEXT: da analyze - output [*|<]!
; CHECK-NEXT: Src: store i8 2, ptr %idx.1, align 1 --> Dst: store i8 2, ptr %idx.1, align 1
; CHECK-NEXT: da analyze - none!
;
diff --git a/llvm/test/Analysis/DependenceAnalysis/gcd-miv-overflow.ll b/llvm/test/Analysis/DependenceAnalysis/gcd-miv-overflow.ll
index 43f66dd7d097..9169ac323d83 100644
--- a/llvm/test/Analysis/DependenceAnalysis/gcd-miv-overflow.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/gcd-miv-overflow.ll
@@ -13,23 +13,20 @@
; offset1 += 3;
; }
;
-; FIXME: DependenceAnalysis currently detects no dependency between the two
-; stores, but it does exist. E.g., consider `m` is 12297829382473034411, which
-; is a modular multiplicative inverse of 3 under modulo 2^64. Then `offset0` is
-; effectively `i + 4`, so accesses will be as follows:
+; Dependency exists between the two stores. E.g., consider `m` is
+; 12297829382473034411, which is a modular multiplicative inverse of 3 under
+; modulo 2^64. Then `offset0` is effectively `i + 4`, so accesses will be as
+; follows:
;
; - A[offset0] : A[4], A[5], A[6], ...
; - A[offset1] : A[0], A[3], A[6], ...
;
-; The root cause is that DA interprets `3*m` in non-modular arithmetic, which
-; isn't necessarily true due to overflow.
-;
define void @gcdmiv_coef_ovfl(ptr %A, i64 %m) {
; CHECK-ALL-LABEL: 'gcdmiv_coef_ovfl'
; CHECK-ALL-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.0, align 1
; CHECK-ALL-NEXT: da analyze - none!
; CHECK-ALL-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
-; CHECK-ALL-NEXT: da analyze - none!
+; CHECK-ALL-NEXT: da analyze - output [*|<]!
; CHECK-ALL-NEXT: Src: store i8 2, ptr %gep.1, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
; CHECK-ALL-NEXT: da analyze - none!
;
@@ -37,7 +34,7 @@ define void @gcdmiv_coef_ovfl(ptr %A, i64 %m) {
; CHECK-GCD-MIV-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.0, align 1
; CHECK-GCD-MIV-NEXT: da analyze - consistent output [*]!
; CHECK-GCD-MIV-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
-; CHECK-GCD-MIV-NEXT: da analyze - none!
+; CHECK-GCD-MIV-NEXT: da analyze - consistent output [*|<]!
; CHECK-GCD-MIV-NEXT: Src: store i8 2, ptr %gep.1, align 1 --> Dst: store i8 2, ptr %gep.1, align 1
; CHECK-GCD-MIV-NEXT: da analyze - consistent output [*]!
;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/add.ll
new file mode 100644
index 000000000000..e11720011af1
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/add.ll
@@ -0,0 +1,612 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX11 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX12 %s
+
+define i16 @s_add_i16(i16 inreg %a, i16 inreg %b) {
+; GFX7-LABEL: s_add_i16:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: s_add_i32 s16, s16, s17
+; GFX7-NEXT: v_mov_b32_e32 v0, s16
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: s_add_i16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_add_i32 s16, s16, s17
+; GFX9-NEXT: v_mov_b32_e32 v0, s16
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: s_add_i16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_add_i32 s16, s16, s17
+; GFX8-NEXT: v_mov_b32_e32 v0, s16
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: s_add_i16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_add_i32 s16, s16, s17
+; GFX10-NEXT: v_mov_b32_e32 v0, s16
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: s_add_i16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_add_i32 s0, s0, s1
+; GFX11-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: s_add_i16:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_add_co_i32 s0, s0, s1
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: v_mov_b32_e32 v0, s0
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %c = add i16 %a, %b
+ ret i16 %c
+}
+
+define i16 @v_add_i16(i16 %a, i16 %b) {
+; GFX7-LABEL: v_add_i16:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_add_i16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_add_u16_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_add_i16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_add_u16_e32 v0, v0, v1
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_add_i16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_add_nc_u16 v0, v0, v1
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_add_i16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_add_nc_u16 v0.l, v0.l, v1.l
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_add_i16:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_add_nc_u16 v0, v0, v1
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %c = add i16 %a, %b
+ ret i16 %c
+}
+
+define i32 @s_add_i32(i32 inreg %a, i32 inreg %b) {
+; GFX7-LABEL: s_add_i32:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: s_add_i32 s16, s16, s17
+; GFX7-NEXT: v_mov_b32_e32 v0, s16
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: s_add_i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_add_i32 s16, s16, s17
+; GFX9-NEXT: v_mov_b32_e32 v0, s16
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: s_add_i32:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_add_i32 s16, s16, s17
+; GFX8-NEXT: v_mov_b32_e32 v0, s16
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: s_add_i32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_add_i32 s16, s16, s17
+; GFX10-NEXT: v_mov_b32_e32 v0, s16
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: s_add_i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_add_i32 s0, s0, s1
+; GFX11-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: s_add_i32:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_add_co_i32 s0, s0, s1
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: v_mov_b32_e32 v0, s0
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %c = add i32 %a, %b
+ ret i32 %c
+}
+
+define i32 @v_add_i32(i32 %a, i32 %b) {
+; GFX7-LABEL: v_add_i32:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_add_i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_add_u32_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_add_i32:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_add_i32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_add_i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_add_nc_u32_e32 v0, v0, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_add_i32:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v1
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %c = add i32 %a, %b
+ ret i32 %c
+}
+
+define <2 x i16> @s_add_v2i16(<2 x i16> inreg %a, <2 x i16> inreg %b) {
+; GFX7-LABEL: s_add_v2i16:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: s_add_i32 s16, s16, s18
+; GFX7-NEXT: s_add_i32 s17, s17, s19
+; GFX7-NEXT: v_mov_b32_e32 v0, s16
+; GFX7-NEXT: v_mov_b32_e32 v1, s17
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: s_add_v2i16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_lshr_b32 s4, s16, 16
+; GFX9-NEXT: s_lshr_b32 s5, s17, 16
+; GFX9-NEXT: s_add_i32 s16, s16, s17
+; GFX9-NEXT: s_add_i32 s4, s4, s5
+; GFX9-NEXT: s_pack_ll_b32_b16 s4, s16, s4
+; GFX9-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: s_add_v2i16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_lshr_b32 s4, s16, 16
+; GFX8-NEXT: s_lshr_b32 s5, s17, 16
+; GFX8-NEXT: s_add_i32 s4, s4, s5
+; GFX8-NEXT: s_add_i32 s16, s16, s17
+; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
+; GFX8-NEXT: s_and_b32 s5, 0xffff, s16
+; GFX8-NEXT: s_lshl_b32 s4, s4, 16
+; GFX8-NEXT: s_or_b32 s4, s5, s4
+; GFX8-NEXT: v_mov_b32_e32 v0, s4
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: s_add_v2i16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_lshr_b32 s4, s16, 16
+; GFX10-NEXT: s_lshr_b32 s5, s17, 16
+; GFX10-NEXT: s_add_i32 s16, s16, s17
+; GFX10-NEXT: s_add_i32 s4, s4, s5
+; GFX10-NEXT: s_pack_ll_b32_b16 s4, s16, s4
+; GFX10-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: s_add_v2i16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_lshr_b32 s2, s0, 16
+; GFX11-NEXT: s_lshr_b32 s3, s1, 16
+; GFX11-NEXT: s_add_i32 s0, s0, s1
+; GFX11-NEXT: s_add_i32 s2, s2, s3
+; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s2
+; GFX11-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: s_add_v2i16:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_lshr_b32 s2, s0, 16
+; GFX12-NEXT: s_lshr_b32 s3, s1, 16
+; GFX12-NEXT: s_add_co_i32 s0, s0, s1
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: s_add_co_i32 s2, s2, s3
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: s_pack_ll_b32_b16 s0, s0, s2
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: v_mov_b32_e32 v0, s0
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %c = add <2 x i16> %a, %b
+ ret <2 x i16> %c
+}
+
+define <2 x i16> @v_add_v2i16(<2 x i16> %a, <2 x i16> %b) {
+; GFX7-LABEL: v_add_v2i16:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
+; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v3
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_add_v2i16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_pk_add_u16 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_add_v2i16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_add_u16_e32 v2, v0, v1
+; GFX8-NEXT: v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_add_v2i16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_pk_add_u16 v0, v0, v1
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_add_v2i16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_pk_add_u16 v0, v0, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_add_v2i16:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_pk_add_u16 v0, v0, v1
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %c = add <2 x i16> %a, %b
+ ret <2 x i16> %c
+}
+
+define i64 @s_add_i64(i64 inreg %a, i64 inreg %b) {
+; GFX7-LABEL: s_add_i64:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: s_add_u32 s4, s16, s18
+; GFX7-NEXT: s_addc_u32 s5, s17, s19
+; GFX7-NEXT: v_mov_b32_e32 v0, s4
+; GFX7-NEXT: v_mov_b32_e32 v1, s5
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: s_add_i64:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_add_u32 s4, s16, s18
+; GFX9-NEXT: s_addc_u32 s5, s17, s19
+; GFX9-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-NEXT: v_mov_b32_e32 v1, s5
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: s_add_i64:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_add_u32 s4, s16, s18
+; GFX8-NEXT: s_addc_u32 s5, s17, s19
+; GFX8-NEXT: v_mov_b32_e32 v0, s4
+; GFX8-NEXT: v_mov_b32_e32 v1, s5
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: s_add_i64:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_add_u32 s4, s16, s18
+; GFX10-NEXT: s_addc_u32 s5, s17, s19
+; GFX10-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-NEXT: v_mov_b32_e32 v1, s5
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: s_add_i64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_add_u32 s0, s0, s2
+; GFX11-NEXT: s_addc_u32 s1, s1, s3
+; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: s_add_i64:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %c = add i64 %a, %b
+ ret i64 %c
+}
+
+define i64 @v_add_i64(i64 %a, i64 %b) {
+; GFX7-LABEL: v_add_i64:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
+; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_add_i64:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_add_i64:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
+; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_add_i64:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_add_i64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_add_i64:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-NEXT: s_wait_alu 0xfffd
+; GFX12-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %c = add i64 %a, %b
+ ret i64 %c
+}
+
+define void @s_uaddo_uadde(i64 inreg %a, i64 inreg %b, ptr addrspace(1) %res, ptr addrspace(1) %carry) {
+; GFX7-LABEL: s_uaddo_uadde:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: s_add_u32 s4, s16, s18
+; GFX7-NEXT: s_addc_u32 s5, s17, s19
+; GFX7-NEXT: v_mov_b32_e32 v4, s4
+; GFX7-NEXT: s_mov_b32 s6, 0
+; GFX7-NEXT: s_cselect_b32 s8, 1, 0
+; GFX7-NEXT: v_mov_b32_e32 v5, s5
+; GFX7-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-NEXT: buffer_store_dwordx2 v[4:5], v[0:1], s[4:7], 0 addr64
+; GFX7-NEXT: v_mov_b32_e32 v0, s8
+; GFX7-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: s_uaddo_uadde:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_add_u32 s4, s16, s18
+; GFX9-NEXT: s_addc_u32 s5, s17, s19
+; GFX9-NEXT: v_mov_b32_e32 v4, s4
+; GFX9-NEXT: s_cselect_b32 s6, 1, 0
+; GFX9-NEXT: v_mov_b32_e32 v5, s5
+; GFX9-NEXT: global_store_dwordx2 v[0:1], v[4:5], off
+; GFX9-NEXT: v_mov_b32_e32 v0, s6
+; GFX9-NEXT: global_store_dword v[2:3], v0, off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: s_uaddo_uadde:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_add_u32 s4, s16, s18
+; GFX8-NEXT: s_addc_u32 s5, s17, s19
+; GFX8-NEXT: v_mov_b32_e32 v4, s4
+; GFX8-NEXT: s_cselect_b32 s6, 1, 0
+; GFX8-NEXT: v_mov_b32_e32 v5, s5
+; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[4:5]
+; GFX8-NEXT: v_mov_b32_e32 v0, s6
+; GFX8-NEXT: flat_store_dword v[2:3], v0
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: s_uaddo_uadde:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_add_u32 s4, s16, s18
+; GFX10-NEXT: s_addc_u32 s5, s17, s19
+; GFX10-NEXT: s_cselect_b32 s6, 1, 0
+; GFX10-NEXT: v_mov_b32_e32 v4, s4
+; GFX10-NEXT: v_mov_b32_e32 v5, s5
+; GFX10-NEXT: v_mov_b32_e32 v6, s6
+; GFX10-NEXT: global_store_dwordx2 v[0:1], v[4:5], off
+; GFX10-NEXT: global_store_dword v[2:3], v6, off
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: s_uaddo_uadde:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_add_u32 s0, s0, s2
+; GFX11-NEXT: s_addc_u32 s1, s1, s3
+; GFX11-NEXT: s_cselect_b32 s2, 1, 0
+; GFX11-NEXT: v_dual_mov_b32 v5, s1 :: v_dual_mov_b32 v4, s0
+; GFX11-NEXT: v_mov_b32_e32 v6, s2
+; GFX11-NEXT: global_store_b64 v[0:1], v[4:5], off
+; GFX11-NEXT: global_store_b32 v[2:3], v6, off
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: s_uaddo_uadde:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_add_co_u32 s0, s0, s2
+; GFX12-NEXT: s_add_co_ci_u32 s1, s1, s3
+; GFX12-NEXT: s_cselect_b32 s2, 1, 0
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: v_dual_mov_b32 v5, s1 :: v_dual_mov_b32 v4, s0
+; GFX12-NEXT: v_mov_b32_e32 v6, s2
+; GFX12-NEXT: global_store_b64 v[0:1], v[4:5], off
+; GFX12-NEXT: global_store_b32 v[2:3], v6, off
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %uaddo = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
+ %add = extractvalue {i64, i1} %uaddo, 0
+ %of = extractvalue {i64, i1} %uaddo, 1
+ %of32 = select i1 %of, i32 1, i32 0
+ store i64 %add, ptr addrspace(1) %res
+ store i32 %of32, ptr addrspace(1) %carry
+ ret void
+}
+
+define void @v_uaddo_uadde(i64 %a, i64 %b, ptr addrspace(1) %res, ptr addrspace(1) %carry) {
+; GFX7-LABEL: v_uaddo_uadde:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
+; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
+; GFX7-NEXT: s_mov_b32 s6, 0
+; GFX7-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX7-NEXT: buffer_store_dwordx2 v[0:1], v[4:5], s[4:7], 0 addr64
+; GFX7-NEXT: buffer_store_dword v2, v[6:7], s[4:7], 0 addr64
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_uaddo_uadde:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX9-NEXT: global_store_dwordx2 v[4:5], v[0:1], off
+; GFX9-NEXT: global_store_dword v[6:7], v2, off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_uaddo_uadde:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
+; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX8-NEXT: flat_store_dwordx2 v[4:5], v[0:1]
+; GFX8-NEXT: flat_store_dword v[6:7], v2
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_uaddo_uadde:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX10-NEXT: global_store_dwordx2 v[4:5], v[0:1], off
+; GFX10-NEXT: global_store_dword v[6:7], v2, off
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_uaddo_uadde:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX11-NEXT: global_store_b64 v[4:5], v[0:1], off
+; GFX11-NEXT: global_store_b32 v[6:7], v2, off
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_uaddo_uadde:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-NEXT: s_wait_alu 0xfffd
+; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX12-NEXT: s_wait_alu 0xfffd
+; GFX12-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX12-NEXT: global_store_b64 v[4:5], v[0:1], off
+; GFX12-NEXT: global_store_b32 v[6:7], v2, off
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %uaddo = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
+ %add = extractvalue {i64, i1} %uaddo, 0
+ %of = extractvalue {i64, i1} %uaddo, 1
+ %of32 = select i1 %of, i32 1, i32 0
+ store i64 %add, ptr addrspace(1) %res
+ store i32 %of32, ptr addrspace(1) %carry
+ ret void
+}
+
+declare {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll
new file mode 100644
index 000000000000..1a7ccf083568
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll
@@ -0,0 +1,66 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX7 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX8 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
+
+define amdgpu_kernel void @fcmp_uniform_select(float %a, i32 %b, i32 %c, ptr addrspace(1) %out) {
+; GFX7-LABEL: fcmp_uniform_select:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x9
+; GFX7-NEXT: s_load_dword s3, s[4:5], 0xb
+; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xd
+; GFX7-NEXT: s_mov_b32 s2, -1
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: v_cmp_eq_f32_e64 s[4:5], s6, 0
+; GFX7-NEXT: s_or_b64 s[4:5], s[4:5], s[4:5]
+; GFX7-NEXT: s_cselect_b32 s4, 1, 0
+; GFX7-NEXT: s_and_b32 s4, s4, 1
+; GFX7-NEXT: s_cmp_lg_u32 s4, 0
+; GFX7-NEXT: s_cselect_b32 s3, s7, s3
+; GFX7-NEXT: v_mov_b32_e32 v0, s3
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX7-NEXT: s_endpgm
+;
+; GFX8-LABEL: fcmp_uniform_select:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8-NEXT: s_load_dword s6, s[4:5], 0x2c
+; GFX8-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x34
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: v_cmp_eq_f32_e64 s[4:5], s0, 0
+; GFX8-NEXT: s_cmp_lg_u64 s[4:5], 0
+; GFX8-NEXT: s_cselect_b32 s0, 1, 0
+; GFX8-NEXT: s_and_b32 s0, s0, 1
+; GFX8-NEXT: s_cmp_lg_u32 s0, 0
+; GFX8-NEXT: s_cselect_b32 s0, s1, s6
+; GFX8-NEXT: v_mov_b32_e32 v0, s2
+; GFX8-NEXT: v_mov_b32_e32 v2, s0
+; GFX8-NEXT: v_mov_b32_e32 v1, s3
+; GFX8-NEXT: flat_store_dword v[0:1], v2
+; GFX8-NEXT: s_endpgm
+;
+; GFX11-LABEL: fcmp_uniform_select:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_clause 0x2
+; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-NEXT: s_load_b32 s6, s[4:5], 0x2c
+; GFX11-NEXT: s_load_b64 s[2:3], s[4:5], 0x34
+; GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_f32_e64 s0, s0, 0
+; GFX11-NEXT: s_cmp_lg_u32 s0, 0
+; GFX11-NEXT: s_cselect_b32 s0, 1, 0
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT: s_and_b32 s0, s0, 1
+; GFX11-NEXT: s_cmp_lg_u32 s0, 0
+; GFX11-NEXT: s_cselect_b32 s0, s1, s6
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-NEXT: global_store_b32 v1, v0, s[2:3]
+; GFX11-NEXT: s_endpgm
+ %cmp = fcmp oeq float %a, 0.0
+ %sel = select i1 %cmp, i32 %b, i32 %c
+ store i32 %sel, ptr addrspace(1) %out
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.mir
new file mode 100644
index 000000000000..67cc0169af61
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.mir
@@ -0,0 +1,37 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn -mcpu=gfx700 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GFX7 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx803 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GF8 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GFX11 %s
+
+---
+name: test_copy_scc_vcc
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GFX7-LABEL: name: test_copy_scc_vcc
+ ; GFX7: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GFX7-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[DEF]], [[DEF]], implicit-def $scc
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GFX7-NEXT: $sgpr0 = COPY [[COPY]]
+ ; GFX7-NEXT: S_ENDPGM 0, implicit $sgpr0
+ ;
+ ; GF8-LABEL: name: test_copy_scc_vcc
+ ; GF8: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GF8-NEXT: S_CMP_LG_U64 [[DEF]], 0, implicit-def $scc
+ ; GF8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GF8-NEXT: $sgpr0 = COPY [[COPY]]
+ ; GF8-NEXT: S_ENDPGM 0, implicit $sgpr0
+ ;
+ ; GFX11-LABEL: name: test_copy_scc_vcc
+ ; GFX11: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF
+ ; GFX11-NEXT: S_CMP_LG_U32 [[DEF]], 0, implicit-def $scc
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GFX11-NEXT: $sgpr0 = COPY [[COPY]]
+ ; GFX11-NEXT: S_ENDPGM 0, implicit $sgpr0
+ %0:vcc(s1) = G_IMPLICIT_DEF
+ %1:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC %0
+ $sgpr0 = COPY %1
+ S_ENDPGM 0, implicit $sgpr0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.mir
new file mode 100644
index 000000000000..097372a95746
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.mir
@@ -0,0 +1,524 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=amdgpu-regbankselect,amdgpu-regbanklegalize %s -verify-machineinstrs -o - | FileCheck %s
+---
+name: add_s16_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+ ; CHECK-LABEL: name: add_s16_ss
+ ; CHECK: liveins: $sgpr0, $sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s16)
+ ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s16)
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[ANYEXT]], [[ANYEXT1]]
+ ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[ADD]](s32)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC2]], [[TRUNC2]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s16) = G_TRUNC %0
+ %3:_(s16) = G_TRUNC %1
+ %4:_(s16) = G_ADD %2, %3
+ %5:_(s16) = G_AND %4, %4
+...
+
+---
+name: add_s16_sv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0
+ ; CHECK-LABEL: name: add_s16_sv
+ ; CHECK: liveins: $sgpr0, $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC]](s16)
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s16) = G_ADD [[COPY2]], [[TRUNC1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[ADD]], [[ADD]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $vgpr0
+ %2:_(s16) = G_TRUNC %0
+ %3:_(s16) = G_TRUNC %1
+ %4:_(s16) = G_ADD %2, %3
+ %5:_(s16) = G_AND %4, %4
+...
+
+---
+name: add_s16_vs
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0
+ ; CHECK-LABEL: name: add_s16_vs
+ ; CHECK: liveins: $sgpr0, $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC1]](s16)
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s16) = G_ADD [[TRUNC]], [[COPY2]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[ADD]], [[ADD]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $sgpr0
+ %2:_(s16) = G_TRUNC %0
+ %3:_(s16) = G_TRUNC %1
+ %4:_(s16) = G_ADD %2, %3
+ %5:_(s16) = G_AND %4, %4
+...
+
+---
+name: add_s16_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: add_s16_vv
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s16) = G_ADD [[TRUNC]], [[TRUNC1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[ADD]], [[ADD]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s16) = G_TRUNC %0
+ %3:_(s16) = G_TRUNC %1
+ %4:_(s16) = G_ADD %2, %3
+ %5:_(s16) = G_AND %4, %4
+...
+
+---
+name: add_s32_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+ ; CHECK-LABEL: name: add_s32_ss
+ ; CHECK: liveins: $sgpr0, $sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[ADD]], [[ADD]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s32) = G_ADD %0, %1
+ %3:_(s32) = G_AND %2, %2
+...
+
+---
+name: add_s32_sv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0
+ ; CHECK-LABEL: name: add_s32_sv
+ ; CHECK: liveins: $sgpr0, $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY2]], [[COPY1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ADD]], [[ADD]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $vgpr0
+ %2:_(s32) = G_ADD %0, %1
+ %3:_(s32) = G_AND %2, %2
+...
+
+---
+name: add_s32_vs
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0
+ ; CHECK-LABEL: name: add_s32_vs
+ ; CHECK: liveins: $sgpr0, $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY]], [[COPY2]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ADD]], [[ADD]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $sgpr0
+ %2:_(s32) = G_ADD %0, %1
+ %3:_(s32) = G_AND %2, %2
+...
+
+---
+name: add_s32_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: add_s32_vv
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ADD]], [[ADD]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = G_ADD %0, %1
+ %3:_(s32) = G_AND %2, %2
+...
+
+---
+name: add_s64_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; CHECK-LABEL: name: add_s64_ss
+ ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s64) = G_ADD [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 255
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s64) = G_AND [[ADD]], [[ADD]]
+ %0:_(s64) = COPY $sgpr0_sgpr1
+ %1:_(s64) = COPY $sgpr2_sgpr3
+ %2:_(s64) = G_ADD %0, %1
+ %3:_(s64) = G_CONSTANT i64 255
+ %4:_(s64) = G_AND %2, %2
+...
+
+---
+name: add_s64_sv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $vgpr0_vgpr1
+ ; CHECK-LABEL: name: add_s64_sv
+ ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64)
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s64) = G_ADD [[COPY2]], [[COPY1]]
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ADD]](s64)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ADD]](s64)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32)
+ %0:_(s64) = COPY $sgpr0_sgpr1
+ %1:_(s64) = COPY $vgpr0_vgpr1
+ %2:_(s64) = G_ADD %0, %1
+ %3:_(s64) = G_AND %2, %2
+...
+
+---
+name: add_s64_vs
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $vgpr0_vgpr1
+ ; CHECK-LABEL: name: add_s64_vs
+ ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64)
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s64) = G_ADD [[COPY]], [[COPY2]]
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ADD]](s64)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ADD]](s64)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32)
+ %0:_(s64) = COPY $vgpr0_vgpr1
+ %1:_(s64) = COPY $sgpr0_sgpr1
+ %2:_(s64) = G_ADD %0, %1
+ %3:_(s64) = G_AND %2, %2
+...
+
+---
+name: add_s64_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK-LABEL: name: add_s64_vv
+ ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s64) = G_ADD [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ADD]](s64)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ADD]](s64)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32)
+ %0:_(s64) = COPY $vgpr0_vgpr1
+ %1:_(s64) = COPY $vgpr2_vgpr3
+ %2:_(s64) = G_ADD %0, %1
+ %3:_(s64) = G_AND %2, %2
+...
+
+---
+name: uaddo_s32_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+ ; CHECK-LABEL: name: uaddo_s32_ss
+ ; CHECK: liveins: $sgpr0, $sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[UADDO:%[0-9]+]]:sgpr(s32), [[UADDO1:%[0-9]+]]:sgpr(s32) = G_UADDO [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[UADDO1]], [[C]]
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[SELECT]], [[UADDO]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s32), %3:_(s1) = G_UADDO %0, %1
+ %4:_(s32) = G_ZEXT %3
+ %5:_(s32) = G_AND %4, %2
+...
+
+---
+name: uaddo_s32_sv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr1
+ ; CHECK-LABEL: name: uaddo_s32_sv
+ ; CHECK: liveins: $sgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY2]], [[COPY1]]
+ ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[UADDO1]](s1), [[C]], [[C1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UADDO]], [[SELECT]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32), %3:_(s1) = G_UADDO %0, %1
+ %4:_(s32) = G_ZEXT %3
+ %5:_(s32) = G_AND %2, %4
+...
+
+---
+name: uaddo_s32_vs
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $sgpr1
+ ; CHECK-LABEL: name: uaddo_s32_vs
+ ; CHECK: liveins: $vgpr0, $sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY]], [[COPY2]]
+ ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[UADDO1]](s1), [[C]], [[C1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UADDO]], [[SELECT]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s32), %3:_(s1) = G_UADDO %0, %1
+ %4:_(s32) = G_ZEXT %3
+ %5:_(s32) = G_AND %2, %4
+...
+
+---
+name: uaddo_s32_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: uaddo_s32_vv
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[UADDO1]](s1), [[C]], [[C1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UADDO]], [[SELECT]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32), %3:_(s1) = G_UADDO %0, %1
+ %4:_(s32) = G_ZEXT %3
+ %5:_(s32) = G_AND %2, %4
+...
+
+---
+name: uadde_s32_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $sgpr2
+ ; CHECK-LABEL: name: uadde_s32_ss
+ ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[COPY2]], [[C]]
+ ; CHECK-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[COPY]], [[COPY1]], [[AND]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[UADDE1]], [[C]]
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND1]](s32), [[C]], [[C1]]
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:sgpr(s32) = G_AND [[UADDE]], [[SELECT]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s32) = COPY $sgpr2
+ %3:_(s1) = G_TRUNC %2
+ %4:_(s32), %5:_(s1) = G_UADDE %0, %1, %3
+ %6:_(s32) = G_ZEXT %5
+ %7:_(s32) = G_AND %4, %6
+...
+
+---
+name: uadde_s32_sv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr1, $sgpr2
+ ; CHECK-LABEL: name: uadde_s32_sv
+ ; CHECK: liveins: $sgpr0, $vgpr1, $sgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:vcc(s1) = G_AMDGPU_COPY_VCC_SCC [[COPY2]](s32)
+ ; CHECK-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY3]], [[COPY1]], [[AMDGPU_COPY_VCC_SCC]]
+ ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[UADDE1]](s1), [[C]], [[C1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UADDE]], [[SELECT]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = COPY $sgpr2
+ %3:_(s1) = G_TRUNC %2
+ %4:_(s32), %5:_(s1) = G_UADDE %0, %1, %3
+ %6:_(s32) = G_ZEXT %5
+ %7:_(s32) = G_AND %4, %6
+...
+
+---
+name: uadde_s32_vs
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $sgpr1, $sgpr2
+ ; CHECK-LABEL: name: uadde_s32_vs
+ ; CHECK: liveins: $vgpr0, $sgpr1, $sgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:vcc(s1) = G_AMDGPU_COPY_VCC_SCC [[COPY2]](s32)
+ ; CHECK-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY]], [[COPY3]], [[AMDGPU_COPY_VCC_SCC]]
+ ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[UADDE1]](s1), [[C]], [[C1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UADDE]], [[SELECT]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s32) = COPY $sgpr2
+ %3:_(s1) = G_TRUNC %2
+ %4:_(s32), %5:_(s1) = G_UADDE %0, %1, %3
+ %6:_(s32) = G_ZEXT %5
+ %7:_(s32) = G_AND %4, %6
+...
+
+---
+name: uadde_s32_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-LABEL: name: uadde_s32_vv
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[COPY2]], [[C]]
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[AND]](s32), [[C1]]
+ ; CHECK-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY]], [[COPY1]], [[ICMP]]
+ ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[UADDE1]](s1), [[C]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UADDE]], [[SELECT]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = COPY $vgpr2
+ %3:_(s1) = G_TRUNC %2
+ %4:_(s32), %5:_(s1) = G_UADDE %0, %1, %3
+ %6:_(s32) = G_ZEXT %5
+ %7:_(s32) = G_AND %4, %6
+...
+
+---
+name: uadde_s32_ss_scc_use
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $sgpr2
+ ; CHECK-LABEL: name: uadde_s32_ss_scc_use
+ ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[COPY2]], [[C]]
+ ; CHECK-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[COPY]], [[COPY1]], [[AND]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[UADDE1]], [[C]]
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND1]](s32), [[C]], [[C1]]
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:sgpr(s32) = G_AND [[UADDE]], [[SELECT]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s32) = COPY $sgpr2
+ %3:_(s1) = G_TRUNC %2
+ %4:_(s32), %5:_(s1) = G_UADDE %0, %1, %3
+ %6:_(s32) = G_ZEXT %5
+ %8:_(s32) = G_AND %4, %6
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s16.mir
index 54ee69fcb220..30c958fcb192 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s16.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s16.mir
@@ -1,6 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=amdgpu-regbankselect,amdgpu-regbanklegalize %s -verify-machineinstrs -o - | FileCheck %s
---
name: add_s16_ss
legalized: true
@@ -19,13 +18,13 @@ body: |
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s16)
; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[ANYEXT]], [[ANYEXT1]]
; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[ADD]](s32)
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC2]](s16)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC2]], [[TRUNC2]]
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s16) = G_TRUNC %0
%3:_(s16) = G_TRUNC %1
%4:_(s16) = G_ADD %2, %3
- S_ENDPGM 0, implicit %4
+ %5:_(s16) = G_AND %4, %4
...
---
@@ -44,13 +43,13 @@ body: |
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC]](s16)
; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s16) = G_ADD [[COPY2]], [[TRUNC1]]
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[ADD]](s16)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[ADD]], [[ADD]]
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $vgpr0
%2:_(s16) = G_TRUNC %0
%3:_(s16) = G_TRUNC %1
%4:_(s16) = G_ADD %2, %3
- S_ENDPGM 0, implicit %4
+ %5:_(s16) = G_AND %4, %4
...
---
@@ -69,13 +68,13 @@ body: |
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC1]](s16)
; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s16) = G_ADD [[TRUNC]], [[COPY2]]
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[ADD]](s16)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[ADD]], [[ADD]]
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $sgpr0
%2:_(s16) = G_TRUNC %0
%3:_(s16) = G_TRUNC %1
%4:_(s16) = G_ADD %2, %3
- S_ENDPGM 0, implicit %4
+ %5:_(s16) = G_AND %4, %4
...
---
@@ -93,11 +92,11 @@ body: |
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s16) = G_ADD [[TRUNC]], [[TRUNC1]]
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[ADD]](s16)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[ADD]], [[ADD]]
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s16) = G_TRUNC %0
%3:_(s16) = G_TRUNC %1
%4:_(s16) = G_ADD %2, %3
- S_ENDPGM 0, implicit %4
+ %5:_(s16) = G_AND %4, %4
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.v2s16.mir
index 97018fac13a8..01eb39111b0a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.v2s16.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.v2s16.mir
@@ -1,6 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
-# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-regbankselect,amdgpu-regbanklegalize %s -verify-machineinstrs -o - | FileCheck %s
---
name: add_v2s16_ss
@@ -18,16 +17,19 @@ body: |
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[LSHR:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>)
- ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
- ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[BITCAST]], [[BITCAST1]]
; CHECK-NEXT: [[ADD1:%[0-9]+]]:sgpr(s32) = G_ADD [[LSHR]], [[LSHR1]]
; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ADD]](s32), [[ADD1]](s32)
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s16) = G_CONSTANT i16 255
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(<2 x s16>) = G_AND [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR]]
%0:_(<2 x s16>) = COPY $sgpr0
%1:_(<2 x s16>) = COPY $sgpr1
%2:_(<2 x s16>) = G_ADD %0, %1
- S_ENDPGM 0, implicit %2
+ %3:_(s16) = G_CONSTANT i16 255
+ %4:_(<2 x s16>) = G_BUILD_VECTOR %3, %3
+ %5:_(<2 x s16>) = G_AND %2, %4
...
---
@@ -44,11 +46,11 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>)
; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(<2 x s16>) = G_ADD [[COPY2]], [[COPY1]]
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[ADD]](<2 x s16>)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[ADD]], [[ADD]]
%0:_(<2 x s16>) = COPY $sgpr0
%1:_(<2 x s16>) = COPY $vgpr0
%2:_(<2 x s16>) = G_ADD %0, %1
- S_ENDPGM 0, implicit %2
+ %3:_(<2 x s16>) = G_AND %2, %2
...
---
@@ -65,9 +67,11 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>)
; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(<2 x s16>) = G_ADD [[COPY]], [[COPY2]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[ADD]], [[ADD]]
%0:_(<2 x s16>) = COPY $vgpr0
%1:_(<2 x s16>) = COPY $sgpr0
%2:_(<2 x s16>) = G_ADD %0, %1
+ %3:_(<2 x s16>) = G_AND %2, %2
...
---
@@ -83,9 +87,9 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(<2 x s16>) = G_ADD [[COPY]], [[COPY1]]
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[ADD]](<2 x s16>)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[ADD]], [[ADD]]
%0:_(<2 x s16>) = COPY $vgpr0
%1:_(<2 x s16>) = COPY $vgpr1
%2:_(<2 x s16>) = G_ADD %0, %1
- S_ENDPGM 0, implicit %2
+ %3:_(<2 x s16>) = G_AND %2, %2
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir
index 7378c9366ec3..e0e783e7a62f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir
@@ -77,10 +77,14 @@ body: |
; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C1]], [[C2]]
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SELECT]](s32)
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s16) = G_CONSTANT i16 255
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC]], [[C3]]
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s1) = G_ICMP intpred(eq), %0, %1
%3:_(s16) = G_SEXT %2
+ %4:_(s16) = G_CONSTANT i16 255
+ %5:_(s16) = G_AND %3, %4
...
---
@@ -215,9 +219,13 @@ body: |
; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C1]], [[C2]]
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SELECT]](s32)
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s16) = G_CONSTANT i16 255
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC]], [[C3]]
%0:_(s32) = COPY $sgpr0
%1:_(s1) = G_TRUNC %0
%2:_(s16) = G_SEXT %1
+ %3:_(s16) = G_CONSTANT i16 255
+ %4:_(s16) = G_AND %2, %3
...
---
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sub.mir
index b0199d3ad5cd..e3c01c0e7fcb 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sub.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sub.mir
@@ -1,5 +1,107 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-regbankselect,amdgpu-regbanklegalize %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-regbankselect,amdgpu-regbanklegalize %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
+
+---
+name: sub_s16_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+ ; CHECK-LABEL: name: sub_s16_ss
+ ; CHECK: liveins: $sgpr0, $sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s16)
+ ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s16)
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:sgpr(s32) = G_SUB [[ANYEXT]], [[ANYEXT1]]
+ ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SUB]](s32)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC2]], [[TRUNC2]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s16) = G_TRUNC %0
+ %3:_(s16) = G_TRUNC %1
+ %4:_(s16) = G_SUB %2, %3
+ %6:_(s16) = G_AND %4, %4
+...
+
+---
+name: sub_s16_sv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0
+ ; CHECK-LABEL: name: sub_s16_sv
+ ; CHECK: liveins: $sgpr0, $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC]](s16)
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s16) = G_SUB [[COPY2]], [[TRUNC1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[SUB]], [[SUB]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $vgpr0
+ %2:_(s16) = G_TRUNC %0
+ %3:_(s16) = G_TRUNC %1
+ %4:_(s16) = G_SUB %2, %3
+ %6:_(s16) = G_AND %4, %4
+...
+
+---
+name: sub_s16_vs
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0
+ ; CHECK-LABEL: name: sub_s16_vs
+ ; CHECK: liveins: $sgpr0, $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC1]](s16)
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s16) = G_SUB [[TRUNC]], [[COPY2]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[SUB]], [[SUB]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $sgpr0
+ %2:_(s16) = G_TRUNC %0
+ %3:_(s16) = G_TRUNC %1
+ %4:_(s16) = G_SUB %2, %3
+ %6:_(s16) = G_AND %4, %4
+...
+
+---
+name: sub_s16_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: sub_s16_vv
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s16) = G_SUB [[TRUNC]], [[TRUNC1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[SUB]], [[SUB]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s16) = G_TRUNC %0
+ %3:_(s16) = G_TRUNC %1
+ %4:_(s16) = G_SUB %2, %3
+ %6:_(s16) = G_AND %4, %4
+...
---
name: sub_s32_ss
@@ -14,9 +116,11 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; CHECK-NEXT: [[SUB:%[0-9]+]]:sgpr(s32) = G_SUB [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[SUB]], [[SUB]]
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = G_SUB %0, %1
+ %4:_(s32) = G_AND %2, %2
...
---
@@ -33,9 +137,11 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[COPY2]], [[COPY1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[SUB]], [[SUB]]
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $vgpr0
%2:_(s32) = G_SUB %0, %1
+ %4:_(s32) = G_AND %2, %2
...
---
@@ -52,9 +158,11 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[COPY]], [[COPY2]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[SUB]], [[SUB]]
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $sgpr0
%2:_(s32) = G_SUB %0, %1
+ %4:_(s32) = G_AND %2, %2
...
---
@@ -70,7 +178,376 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[SUB]], [[SUB]]
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s32) = G_SUB %0, %1
+ %4:_(s32) = G_AND %2, %2
+...
+
+---
+name: sub_v2s16_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+ ; CHECK-LABEL: name: sub_v2s16_ss
+ ; CHECK: liveins: $sgpr0, $sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY]](<2 x s16>)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:sgpr(s32) = G_SUB [[BITCAST]], [[BITCAST1]]
+ ; CHECK-NEXT: [[SUB1:%[0-9]+]]:sgpr(s32) = G_SUB [[LSHR]], [[LSHR1]]
+ ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SUB]](s32), [[SUB1]](s32)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(<2 x s16>) = G_AND [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC]]
+ %0:_(<2 x s16>) = COPY $sgpr0
+ %1:_(<2 x s16>) = COPY $sgpr1
+ %2:_(<2 x s16>) = G_SUB %0, %1
+ %5:_(<2 x s16>) = G_AND %2, %2
+...
+
+---
+name: sub_v2s16_sv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0
+ ; CHECK-LABEL: name: sub_v2s16_sv
+ ; CHECK: liveins: $sgpr0, $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>)
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(<2 x s16>) = G_SUB [[COPY2]], [[COPY1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[SUB]], [[SUB]]
+ %0:_(<2 x s16>) = COPY $sgpr0
+ %1:_(<2 x s16>) = COPY $vgpr0
+ %2:_(<2 x s16>) = G_SUB %0, %1
+ %5:_(<2 x s16>) = G_AND %2, %2
+...
+
+---
+name: sub_v2s16_vs
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0
+ ; CHECK-LABEL: name: sub_v2s16_vs
+ ; CHECK: liveins: $sgpr0, $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>)
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(<2 x s16>) = G_SUB [[COPY]], [[COPY2]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[SUB]], [[SUB]]
+ %0:_(<2 x s16>) = COPY $vgpr0
+ %1:_(<2 x s16>) = COPY $sgpr0
+ %2:_(<2 x s16>) = G_SUB %0, %1
+ %5:_(<2 x s16>) = G_AND %2, %2
+...
+
+---
+name: sub_v2s16_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: sub_v2s16_vv
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(<2 x s16>) = G_SUB [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[SUB]], [[SUB]]
+ %0:_(<2 x s16>) = COPY $vgpr0
+ %1:_(<2 x s16>) = COPY $vgpr1
+ %2:_(<2 x s16>) = G_SUB %0, %1
+ %5:_(<2 x s16>) = G_AND %2, %2
+...
+
+---
+name: sub_s64_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $sgpr0_sgpr1
+ ; CHECK-LABEL: name: sub_s64_ss
+ ; CHECK: liveins: $sgpr0_sgpr1, $sgpr0_sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:sgpr(s64) = G_SUB [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s64) = G_AND [[SUB]], [[SUB]]
+ %0:_(s64) = COPY $sgpr0_sgpr1
+ %1:_(s64) = COPY $sgpr0_sgpr1
+ %2:_(s64) = G_SUB %0, %1
+ %4:_(s64) = G_AND %2, %2
+...
+
+---
+name: sub_s64_sv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $vgpr0_vgpr1
+ ; CHECK-LABEL: name: sub_s64_sv
+ ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64)
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s64) = G_SUB [[COPY2]], [[COPY1]]
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[SUB]](s64)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[SUB]](s64)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32)
+ %0:_(s64) = COPY $sgpr0_sgpr1
+ %1:_(s64) = COPY $vgpr0_vgpr1
+ %2:_(s64) = G_SUB %0, %1
+ %4:_(s64) = G_AND %2, %2
+...
+
+---
+name: sub_s64_vs
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $vgpr0_vgpr1
+ ; CHECK-LABEL: name: sub_s64_vs
+ ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64)
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s64) = G_SUB [[COPY]], [[COPY2]]
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[SUB]](s64)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[SUB]](s64)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32)
+ %0:_(s64) = COPY $vgpr0_vgpr1
+ %1:_(s64) = COPY $sgpr0_sgpr1
+ %2:_(s64) = G_SUB %0, %1
+ %4:_(s64) = G_AND %2, %2
+...
+
+---
+name: sub_s64_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK-LABEL: name: sub_s64_vv
+ ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s64) = G_SUB [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[SUB]](s64)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[SUB]](s64)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32)
+ %0:_(s64) = COPY $vgpr0_vgpr1
+ %1:_(s64) = COPY $vgpr2_vgpr3
+ %2:_(s64) = G_SUB %0, %1
+ %4:_(s64) = G_AND %2, %2
+...
+
+---
+name: usubo_s32_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+ ; CHECK-LABEL: name: usubo_s32_ss
+ ; CHECK: liveins: $sgpr0, $sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[USUBO:%[0-9]+]]:sgpr(s32), [[USUBO1:%[0-9]+]]:sgpr(s32) = G_USUBO [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[USUBO]], [[USUBO]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s32), %3:_(s1) = G_USUBO %0, %1
+ %5:_(s32) = G_AND %2, %2
+...
+
+---
+name: usubo_s32_sv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr1
+ ; CHECK-LABEL: name: usubo_s32_sv
+ ; CHECK: liveins: $sgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[USUBO:%[0-9]+]]:vgpr(s32), [[USUBO1:%[0-9]+]]:vcc(s1) = G_USUBO [[COPY2]], [[COPY1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[USUBO]], [[USUBO]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32), %3:_(s1) = G_USUBO %0, %1
+ %5:_(s32) = G_AND %2, %2
+...
+
+---
+name: usubo_s32_vs
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $sgpr1
+ ; CHECK-LABEL: name: usubo_s32_vs
+ ; CHECK: liveins: $vgpr0, $sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[USUBO:%[0-9]+]]:vgpr(s32), [[USUBO1:%[0-9]+]]:vcc(s1) = G_USUBO [[COPY]], [[COPY2]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[USUBO]], [[USUBO]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s32), %3:_(s1) = G_USUBO %0, %1
+ %5:_(s32) = G_AND %2, %2
+...
+
+---
+name: usubo_s32_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: usubo_s32_vv
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[USUBO:%[0-9]+]]:vgpr(s32), [[USUBO1:%[0-9]+]]:vcc(s1) = G_USUBO [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[USUBO]], [[USUBO]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32), %3:_(s1) = G_USUBO %0, %1
+ %5:_(s32) = G_AND %2, %2
+...
+
+---
+name: usube_s32_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $sgpr2
+ ; CHECK-LABEL: name: usube_s32_ss
+ ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[COPY2]], [[C]]
+ ; CHECK-NEXT: [[USUBE:%[0-9]+]]:sgpr(s32), [[USUBE1:%[0-9]+]]:sgpr(s32) = G_USUBE [[COPY]], [[COPY1]], [[AND]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[USUBE]], [[USUBE]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s32) = COPY $sgpr2
+ %3:_(s1) = G_TRUNC %2
+ %4:_(s32), %5:_(s1) = G_USUBE %0, %1, %3
+ %7:_(s32) = G_AND %4, %4
+...
+
+---
+name: usube_s32_sv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr1, $sgpr2
+ ; CHECK-LABEL: name: usube_s32_sv
+ ; CHECK: liveins: $sgpr0, $vgpr1, $sgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:vcc(s1) = G_AMDGPU_COPY_VCC_SCC [[COPY2]](s32)
+ ; CHECK-NEXT: [[USUBE:%[0-9]+]]:vgpr(s32), [[USUBE1:%[0-9]+]]:vcc(s1) = G_USUBE [[COPY3]], [[COPY1]], [[AMDGPU_COPY_VCC_SCC]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[USUBE]], [[USUBE]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = COPY $sgpr2
+ %3:_(s1) = G_TRUNC %2
+ %4:_(s32), %5:_(s1) = G_USUBE %0, %1, %3
+ %7:_(s32) = G_AND %4, %4
+...
+
+---
+name: usube_s32_vs
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $sgpr1, $sgpr2
+ ; CHECK-LABEL: name: usube_s32_vs
+ ; CHECK: liveins: $vgpr0, $sgpr1, $sgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:vcc(s1) = G_AMDGPU_COPY_VCC_SCC [[COPY2]](s32)
+ ; CHECK-NEXT: [[USUBE:%[0-9]+]]:vgpr(s32), [[USUBE1:%[0-9]+]]:vcc(s1) = G_USUBE [[COPY]], [[COPY3]], [[AMDGPU_COPY_VCC_SCC]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[USUBE]], [[USUBE]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s32) = COPY $sgpr2
+ %3:_(s1) = G_TRUNC %2
+ %4:_(s32), %5:_(s1) = G_USUBE %0, %1, %3
+ %7:_(s32) = G_AND %4, %4
+...
+
+---
+name: usube_s32_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-LABEL: name: usube_s32_vv
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[COPY2]], [[C]]
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[AND]](s32), [[C1]]
+ ; CHECK-NEXT: [[USUBE:%[0-9]+]]:vgpr(s32), [[USUBE1:%[0-9]+]]:vcc(s1) = G_USUBE [[COPY]], [[COPY1]], [[ICMP]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[USUBE]], [[USUBE]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = COPY $vgpr2
+ %3:_(s1) = G_TRUNC %2
+ %4:_(s32), %5:_(s1) = G_USUBE %0, %1, %3
+ %7:_(s32) = G_AND %4, %4
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir
index 088c20a3137f..d4baa5fb864f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir
@@ -73,10 +73,14 @@ body: |
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C]], [[C1]]
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SELECT]](s32)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s16) = G_CONSTANT i16 255
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC]], [[C2]]
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s1) = G_ICMP intpred(eq), %0, %1
%3:_(s16) = G_ZEXT %2
+ %4:_(s16) = G_CONSTANT i16 255
+ %5:_(s16) = G_AND %3, %4
...
---
@@ -209,9 +213,13 @@ body: |
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C]], [[C1]]
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SELECT]](s32)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s16) = G_CONSTANT i16 255
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC]], [[C2]]
%0:_(s32) = COPY $sgpr0
%1:_(s1) = G_TRUNC %0
%2:_(s16) = G_ZEXT %1
+ %3:_(s16) = G_CONSTANT i16 255
+ %4:_(s16) = G_AND %2, %3
...
---
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sub.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sub.ll
new file mode 100644
index 000000000000..8b5958daac16
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sub.ll
@@ -0,0 +1,535 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX11 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX12 %s
+
+define i16 @s_sub_i16(i16 inreg %a, i16 inreg %b) {
+; GFX7-LABEL: s_sub_i16:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: s_sub_i32 s4, s16, s17
+; GFX7-NEXT: v_mov_b32_e32 v0, s4
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: s_sub_i16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_sub_i32 s4, s16, s17
+; GFX9-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: s_sub_i16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_sub_i32 s4, s16, s17
+; GFX8-NEXT: v_mov_b32_e32 v0, s4
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: s_sub_i16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_sub_i32 s4, s16, s17
+; GFX10-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: s_sub_i16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_sub_i32 s0, s0, s1
+; GFX11-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: s_sub_i16:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_sub_co_i32 s0, s0, s1
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: v_mov_b32_e32 v0, s0
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %c = sub i16 %a, %b
+ ret i16 %c
+}
+
+define i16 @v_sub_i16(i16 %a, i16 %b) {
+; GFX7-LABEL: v_sub_i16:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_sub_i16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_sub_u16_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_sub_i16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_sub_u16_e32 v0, v0, v1
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_sub_i16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_sub_nc_u16 v0, v0, v1
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_sub_i16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_sub_nc_u16 v0.l, v0.l, v1.l
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_sub_i16:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_sub_nc_u16 v0, v0, v1
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %c = sub i16 %a, %b
+ ret i16 %c
+}
+
+define i32 @s_sub_i32(i32 inreg %a, i32 inreg %b) {
+; GFX7-LABEL: s_sub_i32:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: s_sub_i32 s4, s16, s17
+; GFX7-NEXT: v_mov_b32_e32 v0, s4
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: s_sub_i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_sub_i32 s4, s16, s17
+; GFX9-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: s_sub_i32:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_sub_i32 s4, s16, s17
+; GFX8-NEXT: v_mov_b32_e32 v0, s4
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: s_sub_i32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_sub_i32 s4, s16, s17
+; GFX10-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: s_sub_i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_sub_i32 s0, s0, s1
+; GFX11-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: s_sub_i32:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_sub_co_i32 s0, s0, s1
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: v_mov_b32_e32 v0, s0
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %c = sub i32 %a, %b
+ ret i32 %c
+}
+
+define i32 @v_sub_i32(i32 %a, i32 %b) {
+; GFX7-LABEL: v_sub_i32:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_sub_i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_sub_u32_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_sub_i32:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v1
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_sub_i32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_sub_nc_u32_e32 v0, v0, v1
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_sub_i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_sub_nc_u32_e32 v0, v0, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_sub_i32:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_sub_nc_u32_e32 v0, v0, v1
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %c = sub i32 %a, %b
+ ret i32 %c
+}
+
+; TODO: Add test for s_sub_v2i16. Instruction selector currently fails
+; to handle G_UNMERGE_VALUES.
+
+define <2 x i16> @v_sub_v2i16(<2 x i16> %a, <2 x i16> %b) {
+; GFX7-LABEL: v_sub_v2i16:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
+; GFX7-NEXT: v_sub_i32_e32 v1, vcc, v1, v3
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_sub_v2i16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_pk_sub_i16 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_sub_v2i16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_sub_u16_e32 v2, v0, v1
+; GFX8-NEXT: v_sub_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_sub_v2i16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_pk_sub_i16 v0, v0, v1
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_sub_v2i16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_pk_sub_i16 v0, v0, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_sub_v2i16:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_pk_sub_i16 v0, v0, v1
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %c = sub <2 x i16> %a, %b
+ ret <2 x i16> %c
+}
+
+define i64 @s_sub_i64(i64 inreg %a, i64 inreg %b) {
+; GFX7-LABEL: s_sub_i64:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: s_sub_u32 s4, s16, s18
+; GFX7-NEXT: s_subb_u32 s5, s17, s19
+; GFX7-NEXT: v_mov_b32_e32 v0, s4
+; GFX7-NEXT: v_mov_b32_e32 v1, s5
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: s_sub_i64:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_sub_u32 s4, s16, s18
+; GFX9-NEXT: s_subb_u32 s5, s17, s19
+; GFX9-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-NEXT: v_mov_b32_e32 v1, s5
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: s_sub_i64:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_sub_u32 s4, s16, s18
+; GFX8-NEXT: s_subb_u32 s5, s17, s19
+; GFX8-NEXT: v_mov_b32_e32 v0, s4
+; GFX8-NEXT: v_mov_b32_e32 v1, s5
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: s_sub_i64:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_sub_u32 s4, s16, s18
+; GFX10-NEXT: s_subb_u32 s5, s17, s19
+; GFX10-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-NEXT: v_mov_b32_e32 v1, s5
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: s_sub_i64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_sub_u32 s0, s0, s2
+; GFX11-NEXT: s_subb_u32 s1, s1, s3
+; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: s_sub_i64:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_sub_nc_u64 s[0:1], s[0:1], s[2:3]
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %c = sub i64 %a, %b
+ ret i64 %c
+}
+
+define i64 @v_sub_i64(i64 %a, i64 %b) {
+; GFX7-LABEL: v_sub_i64:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
+; GFX7-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_sub_i64:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2
+; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_sub_i64:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v2
+; GFX8-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_sub_i64:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2
+; GFX10-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_sub_i64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2
+; GFX11-NEXT: v_sub_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_sub_i64:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2
+; GFX12-NEXT: s_wait_alu 0xfffd
+; GFX12-NEXT: v_sub_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %c = sub i64 %a, %b
+ ret i64 %c
+}
+
+define void @s_usubo_usube(i64 inreg %a, i64 inreg %b, ptr addrspace(1) %res, ptr addrspace(1) %carry) {
+; GFX7-LABEL: s_usubo_usube:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: s_sub_u32 s4, s16, s18
+; GFX7-NEXT: s_subb_u32 s5, s17, s19
+; GFX7-NEXT: v_mov_b32_e32 v4, s4
+; GFX7-NEXT: s_mov_b32 s6, 0
+; GFX7-NEXT: s_cselect_b32 s8, 1, 0
+; GFX7-NEXT: v_mov_b32_e32 v5, s5
+; GFX7-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-NEXT: buffer_store_dwordx2 v[4:5], v[0:1], s[4:7], 0 addr64
+; GFX7-NEXT: v_mov_b32_e32 v0, s8
+; GFX7-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: s_usubo_usube:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_sub_u32 s4, s16, s18
+; GFX9-NEXT: s_subb_u32 s5, s17, s19
+; GFX9-NEXT: v_mov_b32_e32 v4, s4
+; GFX9-NEXT: s_cselect_b32 s6, 1, 0
+; GFX9-NEXT: v_mov_b32_e32 v5, s5
+; GFX9-NEXT: global_store_dwordx2 v[0:1], v[4:5], off
+; GFX9-NEXT: v_mov_b32_e32 v0, s6
+; GFX9-NEXT: global_store_dword v[2:3], v0, off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: s_usubo_usube:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_sub_u32 s4, s16, s18
+; GFX8-NEXT: s_subb_u32 s5, s17, s19
+; GFX8-NEXT: v_mov_b32_e32 v4, s4
+; GFX8-NEXT: s_cselect_b32 s6, 1, 0
+; GFX8-NEXT: v_mov_b32_e32 v5, s5
+; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[4:5]
+; GFX8-NEXT: v_mov_b32_e32 v0, s6
+; GFX8-NEXT: flat_store_dword v[2:3], v0
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: s_usubo_usube:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_sub_u32 s4, s16, s18
+; GFX10-NEXT: s_subb_u32 s5, s17, s19
+; GFX10-NEXT: s_cselect_b32 s6, 1, 0
+; GFX10-NEXT: v_mov_b32_e32 v4, s4
+; GFX10-NEXT: v_mov_b32_e32 v5, s5
+; GFX10-NEXT: v_mov_b32_e32 v6, s6
+; GFX10-NEXT: global_store_dwordx2 v[0:1], v[4:5], off
+; GFX10-NEXT: global_store_dword v[2:3], v6, off
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: s_usubo_usube:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_sub_u32 s0, s0, s2
+; GFX11-NEXT: s_subb_u32 s1, s1, s3
+; GFX11-NEXT: s_cselect_b32 s2, 1, 0
+; GFX11-NEXT: v_dual_mov_b32 v5, s1 :: v_dual_mov_b32 v4, s0
+; GFX11-NEXT: v_mov_b32_e32 v6, s2
+; GFX11-NEXT: global_store_b64 v[0:1], v[4:5], off
+; GFX11-NEXT: global_store_b32 v[2:3], v6, off
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: s_usubo_usube:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_sub_co_u32 s0, s0, s2
+; GFX12-NEXT: s_sub_co_ci_u32 s1, s1, s3
+; GFX12-NEXT: s_cselect_b32 s2, 1, 0
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: v_dual_mov_b32 v5, s1 :: v_dual_mov_b32 v4, s0
+; GFX12-NEXT: v_mov_b32_e32 v6, s2
+; GFX12-NEXT: global_store_b64 v[0:1], v[4:5], off
+; GFX12-NEXT: global_store_b32 v[2:3], v6, off
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %usubo = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b)
+ %sub = extractvalue {i64, i1} %usubo, 0
+ %of = extractvalue {i64, i1} %usubo, 1
+ %of32 = select i1 %of, i32 1, i32 0
+ store i64 %sub, ptr addrspace(1) %res
+ store i32 %of32, ptr addrspace(1) %carry
+ ret void
+}
+
+define void @v_usubo_usube(i64 %a, i64 %b, ptr addrspace(1) %res, ptr addrspace(1) %carry) {
+; GFX7-LABEL: v_usubo_usube:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
+; GFX7-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc
+; GFX7-NEXT: s_mov_b32 s6, 0
+; GFX7-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX7-NEXT: buffer_store_dwordx2 v[0:1], v[4:5], s[4:7], 0 addr64
+; GFX7-NEXT: buffer_store_dword v2, v[6:7], s[4:7], 0 addr64
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_usubo_usube:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2
+; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX9-NEXT: global_store_dwordx2 v[4:5], v[0:1], off
+; GFX9-NEXT: global_store_dword v[6:7], v2, off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_usubo_usube:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v2
+; GFX8-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX8-NEXT: flat_store_dwordx2 v[4:5], v[0:1]
+; GFX8-NEXT: flat_store_dword v[6:7], v2
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_usubo_usube:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2
+; GFX10-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX10-NEXT: global_store_dwordx2 v[4:5], v[0:1], off
+; GFX10-NEXT: global_store_dword v[6:7], v2, off
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_usubo_usube:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2
+; GFX11-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX11-NEXT: global_store_b64 v[4:5], v[0:1], off
+; GFX11-NEXT: global_store_b32 v[6:7], v2, off
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_usubo_usube:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2
+; GFX12-NEXT: s_wait_alu 0xfffd
+; GFX12-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX12-NEXT: s_wait_alu 0xfffd
+; GFX12-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX12-NEXT: global_store_b64 v[4:5], v[0:1], off
+; GFX12-NEXT: global_store_b32 v[6:7], v2, off
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %usubo = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b)
+ %sub = extractvalue {i64, i1} %usubo, 0
+ %of = extractvalue {i64, i1} %usubo, 1
+ %of32 = select i1 %of, i32 1, i32 0
+ store i64 %sub, ptr addrspace(1) %res
+ store i32 %of32, ptr addrspace(1) %carry
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll
index aa591d28eb34..c1f3a12dba57 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll
@@ -591,3 +591,24 @@ exit:
store i32 %ballot, ptr addrspace(1) %out
ret void
}
+
+define amdgpu_cs i32 @compare_bfloats(bfloat %x, bfloat %y) {
+; GFX10-LABEL: compare_bfloats:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX10-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX10-NEXT: v_cmp_gt_f32_e64 s0, v0, v1
+; GFX10-NEXT: ; return to shader part epilog
+;
+; GFX11-LABEL: compare_bfloats:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX11-NEXT: v_mov_b16_e32 v2.h, v1.l
+; GFX11-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX11-NEXT: v_mov_b16_e32 v1.l, v2.l
+; GFX11-NEXT: v_cmp_gt_f32_e64 s0, v1, v2
+; GFX11-NEXT: ; return to shader part epilog
+ %cmp = fcmp ogt bfloat %x, %y
+ %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %cmp)
+ ret i32 %ballot
+}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll
index 30c2c260a327..827a01ff33d0 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll
@@ -557,3 +557,15 @@ exit:
store i64 %ballot, ptr addrspace(1) %out
ret void
}
+
+define amdgpu_cs i64 @compare_bfloats(bfloat %x, bfloat %y) {
+; CHECK-LABEL: compare_bfloats:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; CHECK-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; CHECK-NEXT: v_cmp_gt_f32_e64 s[0:1], v0, v1
+; CHECK-NEXT: ; return to shader part epilog
+ %cmp = fcmp ogt bfloat %x, %y
+ %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %cmp)
+ ret i64 %ballot
+}
diff --git a/llvm/test/CodeGen/ARM/strict-fp-func.ll b/llvm/test/CodeGen/ARM/strict-fp-func.ll
new file mode 100644
index 000000000000..39bb2b46bdac
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/strict-fp-func.ll
@@ -0,0 +1,13 @@
+; RUN: llc -mtriple arm-none-eabi -stop-after=finalize-isel %s -o - | FileCheck %s
+
+define float @func_02(float %x, float %y) strictfp nounwind {
+ %call = call float @func_01(float %x) strictfp
+ %res = call float @llvm.experimental.constrained.fadd.f32(float %call, float %y, metadata !"round.dynamic", metadata !"fpexcept.ignore") strictfp
+ ret float %res
+}
+; CHECK-LABEL: name: func_02
+; CHECK: BL @func_01, {{.*}}, implicit-def $fpscr_rm
+
+
+declare float @func_01(float)
+declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
diff --git a/llvm/test/CodeGen/RISCV/rv64-stackmap.ll b/llvm/test/CodeGen/RISCV/rv64-stackmap.ll
index d07f608bf789..c3183a1a3e03 100644
--- a/llvm/test/CodeGen/RISCV/rv64-stackmap.ll
+++ b/llvm/test/CodeGen/RISCV/rv64-stackmap.ll
@@ -290,9 +290,9 @@ define void @liveConstant() {
; CHECK-NEXT: .half 2
; CHECK-NEXT: .half 0
; CHECK-NEXT: .word
-define void @spilledValue(i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27) {
+define void @spilledValue(i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i8 %l25, i16 zeroext %l26, i32 signext %l27) {
entry:
- call void (i64, i32, ptr, i32, ...) @llvm.experimental.patchpoint.void(i64 11, i32 28, ptr null, i32 5, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27)
+ call void (i64, i32, ptr, i32, ...) @llvm.experimental.patchpoint.void(i64 11, i32 28, ptr null, i32 5, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i8 %l25, i16 %l26, i32 %l27)
ret void
}
diff --git a/llvm/test/CodeGen/RISCV/rv64p.ll b/llvm/test/CodeGen/RISCV/rv64p.ll
index cb07f945a582..f937f44f1332 100644
--- a/llvm/test/CodeGen/RISCV/rv64p.ll
+++ b/llvm/test/CodeGen/RISCV/rv64p.ll
@@ -297,8 +297,7 @@ declare i32 @llvm.abs.i32(i32, i1 immarg)
define i32 @abs_i32(i32 %x) {
; CHECK-LABEL: abs_i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: sext.w a0, a0
-; CHECK-NEXT: abs a0, a0
+; CHECK-NEXT: absw a0, a0
; CHECK-NEXT: ret
%abs = tail call i32 @llvm.abs.i32(i32 %x, i1 true)
ret i32 %abs
@@ -307,8 +306,7 @@ define i32 @abs_i32(i32 %x) {
define signext i32 @abs_i32_sext(i32 signext %x) {
; CHECK-LABEL: abs_i32_sext:
; CHECK: # %bb.0:
-; CHECK-NEXT: abs a0, a0
-; CHECK-NEXT: sext.w a0, a0
+; CHECK-NEXT: absw a0, a0
; CHECK-NEXT: ret
%abs = tail call i32 @llvm.abs.i32(i32 %x, i1 true)
ret i32 %abs
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop1.s
index d85ea799ed3d..399a6441629c 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop1.s
@@ -1,8 +1,8 @@
// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --unique --version 5
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding -comment-column=0 %s | FileCheck --strict-whitespace --check-prefixes=GFX12,GFX12-ASM %s
-// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | sed -n 's#.*\(\[0x[0-9a-fx,]\{1,\}\]\)#\1#p' | llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -disassemble -show-encoding -comment-column=0 | FileCheck --strict-whitespace --check-prefixes=GFX12,GFX12-DIS %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -disassemble -show-encoding -comment-column=0 | FileCheck --strict-whitespace --check-prefixes=GFX12,GFX12-DIS %s
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding -comment-column=0 %s | FileCheck --strict-whitespace --check-prefixes=GFX12,GFX12-ASM %s
-// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | sed -n 's#.*\(\[0x[0-9a-fx,]\{1,\}\]\)#\1#p' | llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding -comment-column=0 | FileCheck --strict-whitespace --check-prefixes=GFX12,GFX12-DIS %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding -comment-column=0 | FileCheck --strict-whitespace --check-prefixes=GFX12,GFX12-DIS %s
v_bfrev_b32_e32 v5, v1
// GFX12: v_bfrev_b32_e32 v5, v1 ; encoding: [0x01,0x71,0x0a,0x7e]
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-profile.ll b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-profile.ll
new file mode 100644
index 000000000000..9cc417f6b874
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-profile.ll
@@ -0,0 +1,89 @@
+; RUN: split-file %s %t
+; RUN: cat %t/main.ll %t/probable-or.prof > %t/probable-or.ll
+; RUN: cat %t/main.ll %t/probable-and.prof > %t/probable-and.ll
+; RUN: opt -passes='loop(simple-loop-unswitch<nontrivial>)' -S %t/probable-or.ll -o -| FileCheck %t/probable-or.prof
+; RUN: opt -passes='loop(simple-loop-unswitch<nontrivial>)' -S %t/probable-and.ll -o -| FileCheck %t/probable-and.prof
+
+;--- main.ll
+declare i32 @a()
+declare i32 @b()
+
+define i32 @or(ptr %ptr, i1 %cond) !prof !0 {
+entry:
+ br label %loop_begin
+
+loop_begin:
+ %v1 = load i1, ptr %ptr
+ %cond_or = or i1 %v1, %cond
+ br i1 %cond_or, label %loop_a, label %loop_b, !prof !1
+
+loop_a:
+ call i32 @a()
+ br label %latch
+
+loop_b:
+ call i32 @b()
+ br label %latch
+
+latch:
+ %v2 = load i1, ptr %ptr
+ br i1 %v2, label %loop_begin, label %loop_exit, !prof !2
+
+loop_exit:
+ ret i32 0
+}
+
+define i32 @and(ptr %ptr, i1 %cond) !prof !0 {
+entry:
+ br label %loop_begin
+
+loop_begin:
+ %v1 = load i1, ptr %ptr
+ %cond_and = and i1 %v1, %cond
+ br i1 %cond_and, label %loop_a, label %loop_b, !prof !1
+
+loop_a:
+ call i32 @a()
+ br label %latch
+
+loop_b:
+ call i32 @b()
+ br label %latch
+
+latch:
+ %v2 = load i1, ptr %ptr
+ br i1 %v2, label %loop_begin, label %loop_exit, !prof !2
+
+loop_exit:
+ ret i32 0
+}
+
+;--- probable-or.prof
+!0 = !{!"function_entry_count", i32 10}
+!1 = !{!"branch_weights", i32 1, i32 1000}
+!2 = !{!"branch_weights", i32 5, i32 7}
+; CHECK-LABEL: @or
+; CHECK-LABEL: entry:
+; CHECK-NEXT: %cond.fr = freeze i1 %cond
+; CHECK-NEXT: br i1 %cond.fr, label %entry.split.us, label %entry.split, !prof !1
+; CHECK-LABEL: @and
+; CHECK-LABEL: entry:
+; CHECK-NEXT: %cond.fr = freeze i1 %cond
+; CHECK-NEXT: br i1 %cond.fr, label %entry.split, label %entry.split.us, !prof !3
+; CHECK: !1 = !{!"branch_weights", i32 1, i32 1000}
+; CHECK: !3 = !{!"unknown", !"simple-loop-unswitch"}
+
+;--- probable-and.prof
+!0 = !{!"function_entry_count", i32 10}
+!1 = !{!"branch_weights", i32 1000, i32 1}
+!2 = !{!"branch_weights", i32 5, i32 7}
+; CHECK-LABEL: @or
+; CHECK-LABEL: entry:
+; CHECK-NEXT: %cond.fr = freeze i1 %cond
+; CHECK-NEXT: br i1 %cond.fr, label %entry.split.us, label %entry.split, !prof !1
+; CHECK-LABEL: @and
+; CHECK-LABEL: entry:
+; CHECK-NEXT: %cond.fr = freeze i1 %cond
+; CHECK-NEXT: br i1 %cond.fr, label %entry.split, label %entry.split.us, !prof !3
+; CHECK: !1 = !{!"unknown", !"simple-loop-unswitch"}
+; CHECK: !3 = !{!"branch_weights", i32 1000, i32 1}
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/pr60736.ll b/llvm/test/Transforms/SimpleLoopUnswitch/pr60736.ll
index 0964c55d1dec..3760be4b26f2 100644
--- a/llvm/test/Transforms/SimpleLoopUnswitch/pr60736.ll
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/pr60736.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
; RUN: opt < %s -simple-loop-unswitch-inject-invariant-conditions=true -passes='loop(simple-loop-unswitch<nontrivial>,loop-instsimplify)' -S | FileCheck %s
define void @test() {
@@ -7,7 +7,7 @@ define void @test() {
; CHECK-NEXT: [[TMP:%.*]] = call i1 @llvm.experimental.widenable.condition()
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr addrspace(1) poison unordered, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load atomic i32, ptr addrspace(1) poison unordered, align 8
-; CHECK-NEXT: br i1 [[TMP]], label [[BB_SPLIT:%.*]], label [[BB3_SPLIT_US:%.*]]
+; CHECK-NEXT: br i1 [[TMP]], label [[BB_SPLIT:%.*]], label [[BB3_SPLIT_US:%.*]], !prof [[PROF0:![0-9]+]]
; CHECK: bb.split:
; CHECK-NEXT: br label [[BB3:%.*]]
; CHECK: bb3:
@@ -19,7 +19,7 @@ define void @test() {
; CHECK-NEXT: [[TMP6_US:%.*]] = phi i32 [ poison, [[BB3_SPLIT_US]] ]
; CHECK-NEXT: [[TMP7_US:%.*]] = add nuw nsw i32 [[TMP6_US]], 2
; CHECK-NEXT: [[TMP8_US:%.*]] = icmp ult i32 [[TMP7_US]], [[TMP2]]
-; CHECK-NEXT: br i1 [[TMP8_US]], label [[BB9_US:%.*]], label [[BB16_SPLIT_US:%.*]], !prof [[PROF0:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP8_US]], label [[BB9_US:%.*]], label [[BB16_SPLIT_US:%.*]], !prof [[PROF0]]
; CHECK: bb9.us:
; CHECK-NEXT: br label [[BB17_SPLIT_US:%.*]]
; CHECK: bb16.split.us:
@@ -96,3 +96,8 @@ declare i1 @llvm.experimental.widenable.condition()
!0 = !{!"branch_weights", i32 1048576, i32 1}
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(inaccessiblemem: readwrite) }
+;.
+; CHECK: [[PROF0]] = !{!"branch_weights", i32 1048576, i32 1}
+;.
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/simple-unswitch-profile.ll b/llvm/test/Transforms/SimpleLoopUnswitch/simple-unswitch-profile.ll
new file mode 100644
index 000000000000..ec6baa5b3772
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/simple-unswitch-profile.ll
@@ -0,0 +1,157 @@
+; RUN: split-file %s %t
+; RUN: cat %t/main.ll %t/probable-or.prof > %t/probable-or.ll
+; RUN: cat %t/main.ll %t/probable-and.prof > %t/probable-and.ll
+; RUN: opt -passes='loop-mssa(simple-loop-unswitch)' -S %t/probable-or.ll -o - | FileCheck %t/probable-or.prof
+; RUN: opt -passes='loop-mssa(simple-loop-unswitch)' -S %t/probable-and.ll -o - | FileCheck %t/probable-and.prof
+;
+; RUN: opt -passes='module(print<block-freq>),function(loop-mssa(simple-loop-unswitch)),module(print<block-freq>)' \
+; RUN: %t/probable-or.ll -disable-output -simple-loop-unswitch-estimate-profile=0 2>&1 | FileCheck %t/probable-or.prof --check-prefixes=PROFILE-COM,PROFILE-REF
+
+; RUN: opt -passes='module(print<block-freq>),function(loop-mssa(simple-loop-unswitch)),module(print<block-freq>)' \
+; RUN: %t/probable-or.ll -disable-output -simple-loop-unswitch-estimate-profile=1 2>&1 | FileCheck %t/probable-or.prof --check-prefixes=PROFILE-COM,PROFILE-CHK
+
+; RUN: opt -passes='module(print<block-freq>),function(loop-mssa(simple-loop-unswitch)),module(print<block-freq>)' \
+; RUN: %t/probable-and.ll -disable-output -simple-loop-unswitch-estimate-profile=0 2>&1 | FileCheck %t/probable-and.prof --check-prefixes=PROFILE-COM,PROFILE-REF
+
+; RUN: opt -passes='module(print<block-freq>),function(loop-mssa(simple-loop-unswitch)),module(print<block-freq>)' \
+; RUN: %t/probable-and.ll -disable-output -simple-loop-unswitch-estimate-profile=1 2>&1 | FileCheck %t/probable-and.prof --check-prefixes=PROFILE-COM,PROFILE-CHK
+
+;--- main.ll
+declare void @some_func() noreturn
+
+define i32 @or(i1 %cond1, i32 %var1) !prof !0 {
+entry:
+ br label %loop_begin
+
+loop_begin:
+ %var3 = phi i32 [%var1, %entry], [%var2, %do_something]
+ %cond2 = icmp eq i32 %var3, 10
+ %cond.or = or i1 %cond1, %cond2
+ br i1 %cond.or, label %loop_exit, label %do_something, !prof !1
+
+do_something:
+ %var2 = add i32 %var3, 1
+ call void @some_func() noreturn nounwind
+ br label %loop_begin
+
+loop_exit:
+ ret i32 0
+}
+
+define i32 @and(i1 %cond1, i32 %var1) !prof !0 {
+entry:
+ br label %loop_begin
+
+loop_begin:
+ %var3 = phi i32 [%var1, %entry], [%var2, %do_something]
+ %cond2 = icmp eq i32 %var3, 10
+ %cond.and = and i1 %cond1, %cond2
+ br i1 %cond.and, label %do_something, label %loop_exit, !prof !1
+
+do_something:
+ %var2 = add i32 %var3, 1
+ call void @some_func() noreturn nounwind
+ br label %loop_begin
+
+loop_exit:
+ ret i32 0
+}
+
+;--- probable-or.prof
+!0 = !{!"function_entry_count", i32 10}
+!1 = !{!"branch_weights", i32 1, i32 1000}
+; CHECK-LABEL: @or
+; CHECK-LABEL: entry:
+; CHECK-NEXT: %cond1.fr = freeze i1 %cond1
+; CHECK-NEXT: br i1 %cond1.fr, label %loop_exit.split, label %entry.split, !prof !1
+; CHECK-LABEL: @and
+; CHECK-LABEL: entry:
+; CHECK-NEXT: %cond1.fr = freeze i1 %cond1
+; CHECK-NEXT: br i1 %cond1.fr, label %entry.split, label %loop_exit.split, !prof !2
+; CHECK: !1 = !{!"branch_weights", i32 1, i32 1000}
+; CHECK: !2 = !{!"unknown", !"simple-loop-unswitch"}
+
+; PROFILE-COM: Printing analysis results of BFI for function 'or':
+; PROFILE-COM: block-frequency-info: or
+ ; PROFILE-COM: - entry: {{.*}} count = 10
+ ; PROFILE-COM: - loop_begin: {{.*}} count = 10010
+ ; PROFILE-COM: - do_something: {{.*}} count = 10000
+ ; PROFILE-COM: - loop_exit: {{.*}} count = 10
+
+; PROFILE-COM: Printing analysis results of BFI for function 'and':
+; PROFILE-COM: block-frequency-info: and
+ ; PROFILE-COM: - entry: {{.*}} count = 10
+ ; PROFILE-COM: - loop_begin: {{.*}} count = 10
+ ; PROFILE-COM: - do_something: {{.*}} count = 0
+ ; PROFILE-COM: - loop_exit: {{.*}} count = 10
+
+; PROFILE-COM: Printing analysis results of BFI for function 'or':
+; PROFILE-COM: block-frequency-info: or
+ ; PROFILE-COM: - entry: {{.*}} count = 10
+ ; PROFILE-REF: - entry.split: {{.*}} count = 5
+ ; PROFILE-CHK: - entry.split: {{.*}} count = 10
+ ; PROFILE-REF: - loop_begin: {{.*}} count = 5005
+ ; PROFILE-CHK: - loop_begin: {{.*}} count = 10000
+ ; PROFILE-REF: - do_something: {{.*}} count = 5000
+ ; PROFILE-CHK: - do_something: {{.*}} count = 9990
+ ; PROFILE-REF: - loop_exit: {{.*}} count = 5
+ ; PROFILE-CHK: - loop_exit: {{.*}} count = 10
+ ; PROFILE-COM: - loop_exit.split: {{.*}} count = 10
+
+; PROFILE-COM: Printing analysis results of BFI for function 'and':
+; PROFILE-COM: block-frequency-info: and
+ ; PROFILE-COM: - entry: {{.*}} count = 10
+ ; PROFILE-COM: - entry.split: {{.*}} count = 5
+ ; PROFILE-COM: - loop_begin: {{.*}} count = 5
+ ; PROFILE-COM: - do_something: {{.*}} count = 0
+ ; PROFILE-COM: - loop_exit: {{.*}} count = 5
+ ; PROFILE-COM: - loop_exit.split: {{.*}} count = 10
+
+;--- probable-and.prof
+!0 = !{!"function_entry_count", i32 10}
+!1 = !{!"branch_weights", i32 1000, i32 1}
+; CHECK-LABEL: @or
+; CHECK-LABEL: entry:
+; CHECK-NEXT: %cond1.fr = freeze i1 %cond1
+; CHECK-NEXT: br i1 %cond1.fr, label %loop_exit.split, label %entry.split, !prof !1
+; CHECK-LABEL: @and
+; CHECK-LABEL: entry:
+; CHECK-NEXT: %cond1.fr = freeze i1 %cond1
+; CHECK-NEXT: br i1 %cond1.fr, label %entry.split, label %loop_exit.split, !prof !2
+; CHECK: !1 = !{!"unknown", !"simple-loop-unswitch"}
+; CHECK: !2 = !{!"branch_weights", i32 1000, i32 1}
+; PROFILE-COM: Printing analysis results of BFI for function 'or':
+; PROFILE-COM: block-frequency-info: or
+ ; PROFILE-COM: - entry: {{.*}}, count = 10
+ ; PROFILE-COM: - loop_begin: {{.*}}, count = 10
+ ; PROFILE-COM: - do_something: {{.*}}, count = 0
+ ; PROFILE-COM: - loop_exit: {{.*}}, count = 10
+
+; PROFILE-COM: Printing analysis results of BFI for function 'and':
+; PROFILE-COM: block-frequency-info: and
+ ; PROFILE-COM: - entry: {{.*}} count = 10
+ ; PROFILE-COM: - loop_begin: {{.*}} count = 10010
+ ; PROFILE-COM: - do_something: {{.*}} count = 10000
+ ; PROFILE-COM: - loop_exit: {{.*}} count = 10
+
+; PROFILE-COM: Printing analysis results of BFI for function 'or':
+; PROFILE-COM: block-frequency-info: or
+ ; PROFILE-COM: - entry: {{.*}} count = 10
+ ; PROFILE-COM: - entry.split: {{.*}} count = 5
+ ; PROFILE-COM: - loop_begin: {{.*}} count = 5
+ ; PROFILE-COM: - do_something: {{.*}} count = 0
+ ; PROFILE-COM: - loop_exit: {{.*}} count = 5
+ ; PROFILE-COM: - loop_exit.split: {{.*}} count = 10
+
+; PROFILE-COM: Printing analysis results of BFI for function 'and':
+; PROFILE-COM: block-frequency-info: and
+ ; PROFILE-COM: - entry: {{.*}} count = 10
+ ; PROFILE-REF: - entry.split: {{.*}} count = 5
+ ; PROFILE-CHK: - entry.split: {{.*}} count = 10
+ ; PROFILE-REF: - loop_begin: {{.*}} count = 5005
+ ; PROFILE-CHK: - loop_begin: {{.*}} count = 10000
+ ; PROFILE-REF: - do_something: {{.*}} count = 5000
+ ; PROFILE-CHK: - do_something: {{.*}} count = 9990
+ ; PROFILE-REF: - loop_exit: {{.*}} count = 5
+ ; PROFILE-CHK: - loop_exit: {{.*}} count = 10
+ ; PROFILE-COM: - loop_exit.split: {{.*}} count = 10
diff --git a/llvm/unittests/ADT/TypeSwitchTest.cpp b/llvm/unittests/ADT/TypeSwitchTest.cpp
index a7d934265c5f..b80122837c1a 100644
--- a/llvm/unittests/ADT/TypeSwitchTest.cpp
+++ b/llvm/unittests/ADT/TypeSwitchTest.cpp
@@ -142,3 +142,44 @@ TEST(TypeSwitchTest, DefaultUnreachableWithVoid) {
EXPECT_DEATH((void)translate(DerivedD()), "Unhandled type");
#endif
}
+
+TEST(TypeSwitchTest, DefaultNullopt) {
+ auto translate = [](auto value) {
+ return TypeSwitch<Base *, std::optional<int>>(&value)
+ .Case([](DerivedA *) { return 0; })
+ .Default(std::nullopt);
+ };
+ EXPECT_EQ(0, translate(DerivedA()));
+ EXPECT_EQ(std::nullopt, translate(DerivedD()));
+}
+
+TEST(TypeSwitchTest, DefaultNullptr) {
+ float foo = 0.0f;
+ auto translate = [&](auto value) {
+ return TypeSwitch<Base *, float *>(&value)
+ .Case([&](DerivedA *) { return &foo; })
+ .Default(nullptr);
+ };
+ EXPECT_EQ(&foo, translate(DerivedA()));
+ EXPECT_EQ(nullptr, translate(DerivedD()));
+}
+
+TEST(TypeSwitchTest, DefaultNullptrForPointerLike) {
+ struct Value {
+ void *ptr;
+ Value(const Value &other) : ptr(other.ptr) {}
+ Value(std::nullptr_t) : ptr(nullptr) {}
+ Value() : Value(nullptr) {}
+ };
+
+ float foo = 0.0f;
+ Value fooVal;
+ fooVal.ptr = &foo;
+ auto translate = [&](auto value) {
+ return TypeSwitch<Base *, Value>(&value)
+ .Case([&](DerivedA *) { return fooVal; })
+ .Default(nullptr);
+ };
+ EXPECT_EQ(&foo, translate(DerivedA()).ptr);
+ EXPECT_EQ(nullptr, translate(DerivedD()).ptr);
+}
diff --git a/llvm/utils/lit/tests/Inputs/shtest-readfile/env.txt b/llvm/utils/lit/tests/Inputs/shtest-readfile/env.txt
index c790b687c436..3e1937375497 100644
--- a/llvm/utils/lit/tests/Inputs/shtest-readfile/env.txt
+++ b/llvm/utils/lit/tests/Inputs/shtest-readfile/env.txt
@@ -1,6 +1,6 @@
## Tests that readfile works with the env builtin.
# RUN: echo -n "hello" > %t.1
-# RUN: env TEST=%{readfile:%t.1} python3 -c "import os; print(os.environ['TEST'])"
+# RUN: env TEST=%{readfile:%t.1} %{python} -c "import os; print(os.environ['TEST'])"
## Fail the test so we can assert on the output.
# RUN: not echo return \ No newline at end of file
diff --git a/llvm/utils/lit/tests/Inputs/shtest-readfile/lit.cfg b/llvm/utils/lit/tests/Inputs/shtest-readfile/lit.cfg
index ee496674fdb6..80af27f57d35 100644
--- a/llvm/utils/lit/tests/Inputs/shtest-readfile/lit.cfg
+++ b/llvm/utils/lit/tests/Inputs/shtest-readfile/lit.cfg
@@ -10,6 +10,7 @@ use_lit_shell = lit.util.pythonize_bool(lit_shell_env)
config.test_format = lit.formats.ShTest(execute_external=not use_lit_shell)
config.test_source_root = None
config.test_exec_root = None
+config.substitutions.append(("%{python}", '"%s"' % (sys.executable)))
# If we are testing with the external shell, remove the fake-externals from
# PATH so that we use mkdir in the tests.
diff --git a/llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit_unlimited.txt b/llvm/utils/lit/tests/Inputs/shtest-ulimit-nondarwin/ulimit_unlimited.txt
index b8aa3d507171..4c687e306186 100644
--- a/llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit_unlimited.txt
+++ b/llvm/utils/lit/tests/Inputs/shtest-ulimit-nondarwin/ulimit_unlimited.txt
@@ -1,6 +1,6 @@
# RUN: ulimit -f 5
-# RUN: %{python} %S/print_limits.py
+# RUN: %{python} %S/../shtest-ulimit/print_limits.py
# RUN: ulimit -f unlimited
-# RUN: %{python} %S/print_limits.py
+# RUN: %{python} %S/../shtest-ulimit/print_limits.py
# Fail the test so that we can assert on the output.
# RUN: not echo return
diff --git a/llvm/utils/lit/tests/shtest-readfile.py b/llvm/utils/lit/tests/shtest-readfile.py
index be3915a1608b..218da2257bcf 100644
--- a/llvm/utils/lit/tests/shtest-readfile.py
+++ b/llvm/utils/lit/tests/shtest-readfile.py
@@ -12,7 +12,7 @@
# CHECK: # executed command: echo '%{readfile:[[TEMP_PATH]]{{[\\\/]}}absolute-paths.txt.tmp}'
# CHECK-LABEL: FAIL: shtest-readfile :: env.txt ({{[^)]*}})
-# CHECK: env TEST=hello python3 -c "import os; print(os.environ['TEST'])"
+# CHECK: env TEST=hello {{.*}} -c "import os; print(os.environ['TEST'])"
# CHECK: # | hello
# CHECK-LABEL: FAIL: shtest-readfile :: file-does-not-exist.txt ({{[^)]*}})
diff --git a/llvm/utils/lit/tests/shtest-ulimit-nondarwin.py b/llvm/utils/lit/tests/shtest-ulimit-nondarwin.py
index 022e8b5f4189..893270ec68f6 100644
--- a/llvm/utils/lit/tests/shtest-ulimit-nondarwin.py
+++ b/llvm/utils/lit/tests/shtest-ulimit-nondarwin.py
@@ -6,10 +6,16 @@
# RUN: not %{lit} -a -v %{inputs}/shtest-ulimit-nondarwin | FileCheck %s
-# CHECK: -- Testing: 1 tests{{.*}}
+# CHECK: -- Testing: 2 tests{{.*}}
# CHECK-LABEL: FAIL: shtest-ulimit :: ulimit_okay.txt ({{[^)]*}})
# CHECK: ulimit -v 1048576
# CHECK: ulimit -s 256
# CHECK: RLIMIT_AS=1073741824
# CHECK: RLIMIT_STACK=262144
+
+# CHECK-LABEL: FAIL: shtest-ulimit :: ulimit_unlimited.txt ({{[^)]*}})
+# CHECK: ulimit -f 5
+# CHECK: RLIMIT_FSIZE=5
+# CHECK: ulimit -f unlimited
+# CHECK: RLIMIT_FSIZE=-1
diff --git a/llvm/utils/lit/tests/shtest-ulimit.py b/llvm/utils/lit/tests/shtest-ulimit.py
index e15e19092030..21e5a5e2491d 100644
--- a/llvm/utils/lit/tests/shtest-ulimit.py
+++ b/llvm/utils/lit/tests/shtest-ulimit.py
@@ -11,7 +11,7 @@
# RUN: not %{lit} -a -v %{inputs}/shtest-ulimit --order=lexical \
# RUN: | FileCheck -DBASE_NOFILE_LIMIT=%{readfile:%t.nofile_limit} %s
-# CHECK: -- Testing: 4 tests{{.*}}
+# CHECK: -- Testing: 3 tests{{.*}}
# CHECK-LABEL: FAIL: shtest-ulimit :: ulimit-bad-arg.txt ({{[^)]*}})
# CHECK: ulimit -n
@@ -25,9 +25,3 @@
# CHECK-LABEL: FAIL: shtest-ulimit :: ulimit_reset.txt ({{[^)]*}})
# CHECK: RLIMIT_NOFILE=[[BASE_NOFILE_LIMIT]]
-
-# CHECK-LABEL: FAIL: shtest-ulimit :: ulimit_unlimited.txt ({{[^)]*}})
-# CHECK: ulimit -f 5
-# CHECK: RLIMIT_FSIZE=5
-# CHECK: ulimit -f unlimited
-# CHECK: RLIMIT_FSIZE=-1
diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
index f3674c3eecfe..ecd036d452b2 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
@@ -293,10 +293,6 @@ def MapOp : LinalgStructuredBase_Op<"map", [
// Implement functions necessary for DestinationStyleOpInterface.
MutableOperandRange getDpsInitsMutable() { return getInitMutable(); }
- SmallVector<OpOperand *> getOpOperandsMatchingBBargs() {
- return getDpsInputOperands();
- }
-
bool payloadUsesValueFromOperand(OpOperand * opOperand) {
if (isDpsInit(opOperand)) return false;
return !getMatchingBlockArgument(opOperand).use_empty();
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
index cbc565b0c8cb..3dc45edf4a23 100644
--- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
@@ -1474,6 +1474,8 @@ void MapOp::getAsmBlockArgumentNames(Region &region,
OpAsmSetValueNameFn setNameFn) {
for (Value v : getRegionInputArgs())
setNameFn(v, "in");
+ for (Value v : getRegionOutputArgs())
+ setNameFn(v, "init");
}
void MapOp::getAsmResultNames(function_ref<void(Value, StringRef)> setNameFn) {
@@ -1495,14 +1497,14 @@ void MapOp::build(
if (bodyBuild)
buildGenericRegion(builder, result.location, *result.regions.front(),
- inputs, /*outputs=*/{}, bodyBuild);
+ inputs, /*outputs=*/{init}, bodyBuild);
}
static void addBodyWithPayloadOp(OpAsmParser &parser, OperationState &result,
const OperationName &payloadOpName,
const NamedAttrList &payloadOpAttrs,
ArrayRef<Value> operands,
- bool initFirst = false) {
+ bool initFirst = false, bool mapInit = true) {
OpBuilder b(parser.getContext());
Region *body = result.addRegion();
Block &block = body->emplaceBlock();
@@ -1516,12 +1518,13 @@ static void addBodyWithPayloadOp(OpAsmParser &parser, OperationState &result,
// If initFirst flag is enabled, we consider init as the first position of
// payload operands.
if (initFirst) {
- payloadOpOperands.push_back(block.getArguments().back());
+ if (mapInit)
+ payloadOpOperands.push_back(block.getArguments().back());
for (const auto &arg : block.getArguments().drop_back())
payloadOpOperands.push_back(arg);
} else {
payloadOpOperands = {block.getArguments().begin(),
- block.getArguments().end()};
+ block.getArguments().end() - int(!mapInit)};
}
Operation *payloadOp = b.create(
@@ -1553,8 +1556,8 @@ ParseResult MapOp::parse(OpAsmParser &parser, OperationState &result) {
if (payloadOpName.has_value()) {
if (!result.operands.empty())
addBodyWithPayloadOp(parser, result, payloadOpName.value(),
- payloadOpAttrs,
- ArrayRef(result.operands).drop_back());
+ payloadOpAttrs, ArrayRef(result.operands), false,
+ false);
else
result.addRegion();
} else {
@@ -1570,7 +1573,11 @@ ParseResult MapOp::parse(OpAsmParser &parser, OperationState &result) {
return success();
}
-static bool canUseShortForm(Block *body, bool initFirst = false) {
+static bool canUseShortForm(Block *body, bool initFirst = false,
+ bool mapInit = true) {
+ // `intFirst == true` implies that we want to map init arg
+ if (initFirst && !mapInit)
+ return false;
// Check if the body can be printed in short form. The following 4 conditions
// must be satisfied:
@@ -1582,7 +1589,7 @@ static bool canUseShortForm(Block *body, bool initFirst = false) {
// 2) The payload op must have the same number of operands as the number of
// block arguments.
if (payload.getNumOperands() == 0 ||
- payload.getNumOperands() != body->getNumArguments())
+ payload.getNumOperands() != body->getNumArguments() - int(!mapInit))
return false;
// 3) If `initFirst` is true (e.g., for reduction ops), the init block
@@ -1600,7 +1607,8 @@ static bool canUseShortForm(Block *body, bool initFirst = false) {
}
} else {
for (const auto &[operand, bbArg] :
- llvm::zip(payload.getOperands(), body->getArguments())) {
+ llvm::zip(payload.getOperands(),
+ body->getArguments().drop_back(int(!mapInit)))) {
if (bbArg != operand)
return false;
}
@@ -1632,7 +1640,8 @@ static void printShortForm(OpAsmPrinter &p, Operation *payloadOp) {
void MapOp::print(OpAsmPrinter &p) {
Block *mapper = getBody();
- bool useShortForm = canUseShortForm(mapper);
+ bool useShortForm =
+ canUseShortForm(mapper, /*initFirst=*/false, /*mapInit*/ false);
if (useShortForm) {
printShortForm(p, &mapper->getOperations().front());
}
@@ -1658,11 +1667,13 @@ LogicalResult MapOp::verify() {
auto *bodyBlock = getBody();
auto blockArgs = bodyBlock->getArguments();
- // Checks if the number of `inputs` match the arity of the `mapper` region.
- if (getInputs().size() != blockArgs.size())
+ // Checks if the number of `inputs` + `init` match the arity of the `mapper`
+ // region.
+ if (getInputs().size() + 1 != blockArgs.size())
return emitOpError() << "expects number of operands to match the arity of "
"mapper, but got: "
- << getInputs().size() << " and " << blockArgs.size();
+ << getInputs().size() + 1 << " and "
+ << blockArgs.size();
// The parameters of mapper should all match the element type of inputs.
for (const auto &[bbArgType, inputArg] :
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Generalization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Generalization.cpp
index 3e31393fd51e..75bb1757a55f 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Generalization.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Generalization.cpp
@@ -31,10 +31,8 @@ using namespace mlir;
using namespace mlir::linalg;
static LogicalResult generalizeNamedOpPrecondition(LinalgOp linalgOp) {
- // Bailout if `linalgOp` is already a generic or a linalg.map. We cannot
- // trivially generalize a `linalg.map`, as it does not use the output as
- // region arguments in the block.
- if (isa<GenericOp>(linalgOp) || isa<MapOp>(linalgOp))
+ // Bailout if `linalgOp` is already a generic.
+ if (isa<GenericOp>(linalgOp))
return failure();
// Check if the operation has exactly one region.
if (linalgOp->getNumRegions() != 1) {
diff --git a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
index bce964e47a3b..c607ece418df 100644
--- a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
@@ -579,6 +579,7 @@ static Value lowerGenerateLikeOpBody(RewriterBase &rewriter, Location loc,
linalg::MapOp::create(rewriter, loc, tensorType, /*inputs=*/ValueRange(),
/*init=*/tensorDestination);
Block &linalgBody = linalgOp.getMapper().emplaceBlock();
+ linalgBody.addArgument(tensorType.getElementType(), loc);
// Create linalg::IndexOps.
rewriter.setInsertionPointToStart(&linalgBody);
@@ -1068,6 +1069,7 @@ struct SplatOpInterface
/*inputs=*/ValueRange(),
/*init=*/*tensorAlloc);
Block &linalgBody = linalgOp.getMapper().emplaceBlock();
+ linalgBody.addArgument(tensorType.getElementType(), loc);
// Create linalg::IndexOps.
rewriter.setInsertionPointToStart(&linalgBody);
diff --git a/mlir/test/Dialect/Linalg/canonicalize.mlir b/mlir/test/Dialect/Linalg/canonicalize.mlir
index 26d2d98572f4..f4020ede4854 100644
--- a/mlir/test/Dialect/Linalg/canonicalize.mlir
+++ b/mlir/test/Dialect/Linalg/canonicalize.mlir
@@ -1423,7 +1423,7 @@ func.func @transpose_buffer(%input: memref<?xf32>,
func.func @recursive_effect(%arg : tensor<1xf32>) {
%init = arith.constant dense<0.0> : tensor<1xf32>
%mapped = linalg.map ins(%arg:tensor<1xf32>) outs(%init :tensor<1xf32>)
- (%in : f32) {
+ (%in : f32, %out: f32) {
vector.print %in : f32
linalg.yield %in : f32
}
diff --git a/mlir/test/Dialect/Linalg/generalize-named-ops.mlir b/mlir/test/Dialect/Linalg/generalize-named-ops.mlir
index ae07b1b82228..dcdd6c8db4b2 100644
--- a/mlir/test/Dialect/Linalg/generalize-named-ops.mlir
+++ b/mlir/test/Dialect/Linalg/generalize-named-ops.mlir
@@ -386,18 +386,24 @@ func.func @generalize_batch_reduce_gemm_bf16(%lhs: memref<7x8x9xbf16>, %rhs: mem
// -----
-// CHECK-LABEL: generalize_linalg_map
-func.func @generalize_linalg_map(%arg0: memref<1x8x8x8xf32>) {
+func.func @generalize_linalg_map(%arg0: memref<1x8x8x8xf32>, %arg1: memref<1x8x8x8xf32>, %arg2: memref<1x8x8x8xf32>) {
%cst = arith.constant 0.000000e+00 : f32
- // CHECK: linalg.map
- // CHECK-NOT: linalg.generic
- linalg.map outs(%arg0 : memref<1x8x8x8xf32>)
- () {
- linalg.yield %cst : f32
- }
+ linalg.map {arith.addf} ins(%arg0, %arg1: memref<1x8x8x8xf32>, memref<1x8x8x8xf32>) outs(%arg2 : memref<1x8x8x8xf32>)
return
}
+// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
+
+// CHECK: @generalize_linalg_map
+
+// CHECK: linalg.generic
+// CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP0]], #[[MAP0]]]
+// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
+// CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x8x8x8xf32>, memref<1x8x8x8xf32>) outs(%{{.+}} : memref<1x8x8x8xf32>
+// CHECK: ^{{.+}}(%[[BBARG0:.+]]: f32, %[[BBARG1:.+]]: f32, %[[BBARG2:.+]]: f32)
+// CHECK: %[[ADD:.+]] = arith.addf %[[BBARG0]], %[[BBARG1]] : f32
+// CHECK: linalg.yield %[[ADD]] : f32
+
// -----
func.func @generalize_add(%lhs: memref<7x14x21xf32>, %rhs: memref<7x14x21xf32>,
diff --git a/mlir/test/Dialect/Linalg/invalid.mlir b/mlir/test/Dialect/Linalg/invalid.mlir
index 40bf4d19d6b9..fabc8e610612 100644
--- a/mlir/test/Dialect/Linalg/invalid.mlir
+++ b/mlir/test/Dialect/Linalg/invalid.mlir
@@ -681,7 +681,7 @@ func.func @map_binary_wrong_yield_operands(
%add = linalg.map
ins(%lhs, %rhs : tensor<64xf32>, tensor<64xf32>)
outs(%init:tensor<64xf32>)
- (%lhs_elem: f32, %rhs_elem: f32) {
+ (%lhs_elem: f32, %rhs_elem: f32, %out: f32) {
%0 = arith.addf %lhs_elem, %rhs_elem: f32
// expected-error @+1{{'linalg.yield' op expected number of yield values (2) to match the number of inits / outs operands of the enclosing LinalgOp (1)}}
linalg.yield %0, %0: f32, f32
@@ -694,11 +694,11 @@ func.func @map_binary_wrong_yield_operands(
func.func @map_input_mapper_arity_mismatch(
%lhs: tensor<64xf32>, %rhs: tensor<64xf32>, %init: tensor<64xf32>)
-> tensor<64xf32> {
- // expected-error@+1{{'linalg.map' op expects number of operands to match the arity of mapper, but got: 2 and 3}}
+ // expected-error@+1{{'linalg.map' op expects number of operands to match the arity of mapper, but got: 3 and 4}}
%add = linalg.map
ins(%lhs, %rhs : tensor<64xf32>, tensor<64xf32>)
outs(%init:tensor<64xf32>)
- (%lhs_elem: f32, %rhs_elem: f32, %extra_elem: f32) {
+ (%lhs_elem: f32, %rhs_elem: f32, %out: f32, %extra_elem: f32) {
%0 = arith.addf %lhs_elem, %rhs_elem: f32
linalg.yield %0: f32
}
@@ -714,7 +714,7 @@ func.func @map_input_mapper_type_mismatch(
%add = linalg.map
ins(%lhs, %rhs : tensor<64xf32>, tensor<64xf32>)
outs(%init:tensor<64xf32>)
- (%lhs_elem: f64, %rhs_elem: f64) {
+ (%lhs_elem: f64, %rhs_elem: f64, %out: f32) {
%0 = arith.addf %lhs_elem, %rhs_elem: f64
linalg.yield %0: f64
}
@@ -730,7 +730,7 @@ func.func @map_input_output_shape_mismatch(
%add = linalg.map
ins(%lhs, %rhs : tensor<64x64xf32>, tensor<64x64xf32>)
outs(%init:tensor<32xf32>)
- (%lhs_elem: f32, %rhs_elem: f32) {
+ (%lhs_elem: f32, %rhs_elem: f32, %out: f32) {
%0 = arith.addf %lhs_elem, %rhs_elem: f32
linalg.yield %0: f32
}
diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir
index 1df15e85bac1..85cc1ffc2029 100644
--- a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir
+++ b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir
@@ -339,7 +339,7 @@ func.func @map_binary(%lhs: tensor<64xf32>, %rhs: tensor<64xf32>,
%add = linalg.map
ins(%lhs, %rhs: tensor<64xf32>, tensor<64xf32>)
outs(%init:tensor<64xf32>)
- (%lhs_elem: f32, %rhs_elem: f32) {
+ (%lhs_elem: f32, %rhs_elem: f32, %out: f32) {
%0 = arith.addf %lhs_elem, %rhs_elem: f32
linalg.yield %0: f32
}
diff --git a/mlir/test/Dialect/Linalg/roundtrip.mlir b/mlir/test/Dialect/Linalg/roundtrip.mlir
index 563013d4083a..74928920c695 100644
--- a/mlir/test/Dialect/Linalg/roundtrip.mlir
+++ b/mlir/test/Dialect/Linalg/roundtrip.mlir
@@ -341,7 +341,7 @@ func.func @mixed_parallel_reduced_results(%arg0 : tensor<?x?x?xf32>,
func.func @map_no_inputs(%init: tensor<64xf32>) -> tensor<64xf32> {
%add = linalg.map
outs(%init:tensor<64xf32>)
- () {
+ (%out: f32) {
%0 = arith.constant 0.0: f32
linalg.yield %0: f32
}
@@ -349,7 +349,7 @@ func.func @map_no_inputs(%init: tensor<64xf32>) -> tensor<64xf32> {
}
// CHECK-LABEL: func @map_no_inputs
// CHECK: linalg.map outs
-// CHECK-NEXT: () {
+// CHECK-NEXT: (%[[OUT:.*]]: f32) {
// CHECK-NEXT: arith.constant
// CHECK-NEXT: linalg.yield
// CHECK-NEXT: }
@@ -361,7 +361,7 @@ func.func @map_binary(%lhs: tensor<64xf32>, %rhs: tensor<64xf32>,
%add = linalg.map
ins(%lhs, %rhs: tensor<64xf32>, tensor<64xf32>)
outs(%init:tensor<64xf32>)
- (%lhs_elem: f32, %rhs_elem: f32) {
+ (%lhs_elem: f32, %rhs_elem: f32, %out: f32) {
%0 = arith.addf %lhs_elem, %rhs_elem: f32
linalg.yield %0: f32
}
@@ -378,7 +378,7 @@ func.func @map_binary_memref(%lhs: memref<64xf32>, %rhs: memref<64xf32>,
linalg.map
ins(%lhs, %rhs: memref<64xf32>, memref<64xf32>)
outs(%init:memref<64xf32>)
- (%lhs_elem: f32, %rhs_elem: f32) {
+ (%lhs_elem: f32, %rhs_elem: f32, %out: f32) {
%0 = arith.addf %lhs_elem, %rhs_elem: f32
linalg.yield %0: f32
}
@@ -393,7 +393,7 @@ func.func @map_unary(%input: tensor<64xf32>, %init: tensor<64xf32>) -> tensor<64
%abs = linalg.map
ins(%input:tensor<64xf32>)
outs(%init:tensor<64xf32>)
- (%input_elem: f32) {
+ (%input_elem: f32, %out: f32) {
%0 = math.absf %input_elem: f32
linalg.yield %0: f32
}
@@ -408,7 +408,7 @@ func.func @map_unary_memref(%input: memref<64xf32>, %init: memref<64xf32>) {
linalg.map
ins(%input:memref<64xf32>)
outs(%init:memref<64xf32>)
- (%input_elem: f32) {
+ (%input_elem: f32, %out: f32) {
%0 = math.absf %input_elem: f32
linalg.yield %0: f32
}
@@ -604,7 +604,7 @@ func.func @map_arith_with_attr(%lhs: tensor<64xf32>, %rhs: tensor<64xf32>,
%add = linalg.map
ins(%lhs, %rhs: tensor<64xf32>, tensor<64xf32>)
outs(%init:tensor<64xf32>)
- (%lhs_elem: f32, %rhs_elem: f32) {
+ (%lhs_elem: f32, %rhs_elem: f32, %out: f32) {
%0 = arith.addf %lhs_elem, %rhs_elem fastmath<fast> : f32
linalg.yield %0: f32
}
@@ -622,7 +622,7 @@ func.func @map_arith_with_attr(%lhs: tensor<64xf32>, %rhs: tensor<64xf32>,
func.func @map_not_short_form_compatible(%lhs: tensor<1x32xf32>, %rhs: tensor<1x32xf32>, %init: tensor<1x32xf32>) -> tensor<1x32xf32> {
%mapped = linalg.map ins(%lhs, %rhs : tensor<1x32xf32>, tensor<1x32xf32>) outs(%init : tensor<1x32xf32>)
- (%in_1: f32, %in_2: f32) {
+ (%in_1: f32, %in_2: f32, %out: f32) {
%1 = arith.maximumf %in_1, %in_2 : f32
linalg.yield %in_1 : f32
}
@@ -634,7 +634,7 @@ func.func @map_not_short_form_compatible(%lhs: tensor<1x32xf32>, %rhs: tensor<1x
// CHECK-NOT: linalg.map { arith.maximumf } ins(%[[LHS]] : tensor<1x32xf32>
// CHECK: linalg.map ins(%[[LHS]], %[[RHS]] : tensor<1x32xf32>, tensor<1x32xf32>)
// CHECK-SAME: outs(%[[INIT]] : tensor<1x32xf32>)
-// CHECK-NEXT: (%[[IN1:.*]]: f32, %[[IN2:.*]]: f32) {
+// CHECK-NEXT: (%[[IN1:.*]]: f32, %[[IN2:.*]]: f32, %[[OUT:.*]]: f32) {
// CHECK-NEXT: %[[MAX_RESULT:.*]] = arith.maximumf %[[IN1]], %[[IN2]] : f32
// CHECK-NEXT: linalg.yield %[[IN1]] : f32
// CHECK-NEXT: }
diff --git a/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir b/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir
index 93a03369be23..aa2c1da4b627 100644
--- a/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir
+++ b/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir
@@ -356,7 +356,7 @@ func.func @vectorize_map(%arg0: memref<64xf32>,
%arg1: memref<64xf32>, %arg2: memref<64xf32>) {
linalg.map ins(%arg0, %arg1 : memref<64xf32>, memref<64xf32>)
outs(%arg2 : memref<64xf32>)
- (%in: f32, %in_0: f32) {
+ (%in: f32, %in_0: f32, %out: f32) {
%0 = arith.addf %in, %in_0 : f32
linalg.yield %0 : f32
}
diff --git a/mlir/test/Dialect/Tensor/bufferize.mlir b/mlir/test/Dialect/Tensor/bufferize.mlir
index 296ca02564e3..5eb2360a29b8 100644
--- a/mlir/test/Dialect/Tensor/bufferize.mlir
+++ b/mlir/test/Dialect/Tensor/bufferize.mlir
@@ -728,7 +728,7 @@ func.func @tensor.concat_dynamic_nonconcat_dim(%f: tensor<?x?xf32>, %g: tensor<?
// CHECK-DAG: %[[ALLOC:.*]] = memref.alloc(%[[M]], %[[N]]) {{.*}} : memref<?x3x?xf32>
// CHECK: %[[ALLOC_T:.*]] = bufferization.to_tensor %[[ALLOC]]
// CHECK: %[[MAPPED:.*]] = linalg.map outs(%[[ALLOC_T]] : tensor<?x3x?xf32>)
-// CHECK: () {
+// CHECK: (%[[INIT:.*]]: f32) {
// CHECK: linalg.yield %[[F]] : f32
// CHECK: }
// CHECK: return %[[MAPPED]] : tensor<?x3x?xf32>
diff --git a/mlir/test/Interfaces/TilingInterface/lower-to-loops-using-interface.mlir b/mlir/test/Interfaces/TilingInterface/lower-to-loops-using-interface.mlir
index 8cbee3cbb758..aa8882d21698 100644
--- a/mlir/test/Interfaces/TilingInterface/lower-to-loops-using-interface.mlir
+++ b/mlir/test/Interfaces/TilingInterface/lower-to-loops-using-interface.mlir
@@ -257,10 +257,10 @@ module attributes {transform.with_named_sequence} {
// -----
func.func @map(%lhs: memref<64xf32>,
- %rhs: memref<64xf32>, %out: memref<64xf32>) {
+ %rhs: memref<64xf32>, %init: memref<64xf32>) {
linalg.map ins(%lhs, %rhs : memref<64xf32>, memref<64xf32>)
- outs(%out : memref<64xf32>)
- (%in: f32, %in_0: f32) {
+ outs(%init : memref<64xf32>)
+ (%in: f32, %in_0: f32, %out: f32) {
%0 = arith.addf %in, %in_0 : f32
linalg.yield %0 : f32
}
diff --git a/mlir/test/lib/Dialect/Transform/TestTransformDialectExtension.cpp b/mlir/test/lib/Dialect/Transform/TestTransformDialectExtension.cpp
index 496f18bc49fa..61db9d2b4446 100644
--- a/mlir/test/lib/Dialect/Transform/TestTransformDialectExtension.cpp
+++ b/mlir/test/lib/Dialect/Transform/TestTransformDialectExtension.cpp
@@ -797,7 +797,7 @@ DiagnosedSilenceableFailure mlir::test::TestProduceInvalidIR::applyToOne(
// Provide some IR that does not verify.
rewriter.setInsertionPointToStart(&target->getRegion(0).front());
TestDummyPayloadOp::create(rewriter, target->getLoc(), TypeRange(),
- ValueRange(), /*failToVerify=*/true);
+ ValueRange(), /*fail_to_verify=*/true);
return DiagnosedSilenceableFailure::success();
}